blob: 4744781ee75378d7c597e11c8fde69899e8ed7ca [file] [log] [blame]
#!/usr/bin/env python
##
## Name: mkdoc.py
## Purpose: Extract documentation from header files.
##
## Copyright (C) 2018 Michael J. Fromberger. All Rights Reserved.
##
## Usage: mkdoc.py <template> <output>
##
from __future__ import print_function
import collections, re, sys
# A regular expression to match commented declarations.
# This is specific to C and not very general; it should work fine for the imath
# headers but will not adapt well to arbitrary code or to C++.
doc = re.compile(r'''(?mx)/\*\* # open /**
(?P<text>(?:[^*]|\*[^/])*) # text Does a thing
\*/\n # close */
(?P<decl>[^;{]*(?:;$|\{))''') # decl void f(x);
# A regular expression matching up to 4 spaces at the head of a line.
spc = re.compile(r'(?m)^ {1,4}')
# A regular expression matching an insertion point. An insertion point has the
# form {{include "header" name ...}}. If no names are given, all the names in
# the given header are inserted.
ins = re.compile(r'{{insert "(?P<file>[^"]*)"(?P<names>(?:\s+\w+)+)?\s*}}')
# A regular expression matching non-identifier characters, for splitting.
nid = re.compile(r'\W+')
# A cache of already-parsed files, maps filename to declarations.
CACHE = {}
def last_word(s):
"""Returns the last identifier-shaped word in s."""
return nid.split(s.strip())[-1]
def typeset(text):
"""Renders text with verbatim sections into markdown."""
lines = []
fence = False
for line in text.split('\n'):
if fence != line.startswith(' '):
lines.append('```')
fence = not fence
lines.append(line)
if fence:
lines.append('```')
for i, line in enumerate(lines):
if i == 0: lines[i] = ' - ' + line
elif line: lines[i] = ' ' + line
return '\n'.join(lines)
class LIndex(object):
"""Represents a line offset index for text."""
def __init__(self, text):
pos = 0
# An array of ending offsets for each line, with a sentinel at position
# 0 to make the index arithmetic easier.
idx = [0]
# Scan forward for newlines or EOF, and push the offsets of the line
# breaks onto the list so we can binary search them later.
while pos < len(text):
next = text.find('\n', pos)
if next < 0:
break
idx.append(next)
pos = next + 1
if idx[-1] < len(text):
idx.append(len(text))
self._len = len(text)
self._index = idx
def linecol(self, pos):
"""Returns the (line, col) corresponding to pos.
Line numbers are 1-based, columns are 0-based.
"""
if pos < 0 or pos > self._len:
raise IndexError("position %d out of range" % pos)
# Binary search for the largest line number whose end marker is at or
# after pos and whose previous line's end is before pos.
idx = self._index
i, j = 1, len(idx)
while i < j:
m = (i + j) / 2
if idx[m] < pos:
i = m + 1
elif idx[m - 1] < pos:
return m, pos - idx[m - 1]
else:
j = m
# This happens if (and only if) the whole file is one line.
return 1, pos
class Decl(object):
"""Represents a single documented declaration."""
def __init__(self, com, decl, line=None):
"""Initialize a new documented declaration.
Params:
com: the raw text of the comment
decl: the raw text of the declaration
line: the line number of the declaration
"""
lp = decl.find('(')
if lp < 0:
self.name = last_word(decl.rstrip(';'))
else:
self.name = last_word(decl[:lp])
self.decl = ' '.join(decl.rstrip(';{').strip().split())
self.comment = spc.sub('', com.rstrip())
self.line = line
def __repr__(self):
return '#Decl["%s"]' % self.decl
def markdown(self, path):
pos = self.decl.index(self.name)
decl = '%s<a href="%s#L%d">%s</a>%s' % (
self.decl[:pos],
path,
self.line,
self.name,
self.decl[pos + len(self.name):],
)
return '''------------
<a id="{name}"></a><pre>
{decl};
</pre>
{comment}
'''.format(name=self.name, decl=decl, comment=typeset(self.comment))
def parse_decls(text):
"""Parse a dictionary of declarations from text."""
decls = collections.OrderedDict()
idx = LIndex(text)
for m in doc.finditer(text):
line, _ = idx.linecol(m.span('decl')[0])
d = Decl(m.group('text'), m.group('decl'), line)
decls[d.name] = d
return decls
def load_file(path):
"""Load declarations from path, or use cached results."""
if path not in CACHE:
with file(path, 'rU') as fp:
CACHE[path] = parse_decls(fp.read())
return CACHE[path]
def main(args):
if len(args) != 2:
print("Usage: mkdoc.py <input> <output>", file=sys.stderr)
sys.exit(1)
doc_template = args[0]
doc_markdown = args[1]
with file(doc_template, 'rU') as input:
template = input.read()
with file(doc_markdown, 'wt') as output:
print(
'''<!--
This file was generated from "{0}" by mkdoc.py
DO NOT EDIT
-->
'''.format(doc_template),
file=output)
pos = 0 # last position of input copied
# Look for substitution markers in the template, and replace them with
# their content.
for ip in ins.finditer(template):
output.write(template[pos:ip.start()])
pos = ip.end()
decls = load_file(ip.group('file'))
if ip.group('names'): # pick the selected names, in order
decls = collections.OrderedDict(
(key, decls[key])
for key in ip.group('names').strip().split())
# Render the selected declarations.
for decl in decls.values():
print(decl.markdown(ip.group('file')), file=output)
# Clean up any remaining template bits
output.write(template[pos:])
if __name__ == "__main__":
main(sys.argv[1:])