| #!/usr/bin/env python3 |
| # A tool to automatically generate documentation for the config options of the |
| # clang static analyzer by reading `AnalyzerOptions.def`. |
| |
| import argparse |
| from collections import namedtuple |
| from enum import Enum, auto |
| import re |
| import sys |
| import textwrap |
| |
| |
| # The following code implements a trivial parser for the narrow subset of C++ |
| # which is used in AnalyzerOptions.def. This supports the following features: |
| # - ignores preprocessor directives, even if they are continued with \ at EOL |
| # - ignores comments: both /* ... */ and // ... |
| # - parses string literals (even if they contain \" escapes) |
| # - concatenates adjacent string literals |
| # - parses numbers even if they contain ' as a thousands separator |
| # - recognizes MACRO(arg1, arg2, ..., argN) calls |
| |
| |
| class TT(Enum): |
| "Token type enum." |
| number = auto() |
| ident = auto() |
| string = auto() |
| punct = auto() |
| |
| |
| TOKENS = [ |
| (re.compile(r"-?[0-9']+"), TT.number), |
| (re.compile(r"\w+"), TT.ident), |
| (re.compile(r'"([^\\"]|\\.)*"'), TT.string), |
| (re.compile(r"[(),]"), TT.punct), |
| (re.compile(r"/\*((?!\*/).)*\*/", re.S), None), # C-style comment |
| (re.compile(r"//.*\n"), None), # C++ style oneline comment |
| (re.compile(r"#.*(\\\n.*)*(?<!\\)\n"), None), # preprocessor directive |
| (re.compile(r"\s+"), None), # whitespace |
| ] |
| |
| Token = namedtuple("Token", "kind code") |
| |
| |
| class ErrorHandler: |
| def __init__(self): |
| self.seen_errors = False |
| |
| # This script uses some heuristical tweaks to modify the documentation |
| # of some analyzer options. As this code is fragile, we record the use |
| # of these tweaks and report them if they become obsolete: |
| self.unused_tweaks = [ |
| "escape star", |
| "escape underline", |
| "accepted values", |
| "example file content", |
| ] |
| |
| def record_use_of_tweak(self, tweak_name): |
| try: |
| self.unused_tweaks.remove(tweak_name) |
| except ValueError: |
| pass |
| |
| def replace_as_tweak(self, string, pattern, repl, tweak_name): |
| res = string.replace(pattern, repl) |
| if res != string: |
| self.record_use_of_tweak(tweak_name) |
| return res |
| |
| def report_error(self, msg): |
| print("Error:", msg, file=sys.stderr) |
| self.seen_errors = True |
| |
| def report_unexpected_char(self, s, pos): |
| lines = (s[:pos] + "X").split("\n") |
| lineno, col = (len(lines), len(lines[-1])) |
| self.report_error( |
| "unexpected character %r in AnalyzerOptions.def at line %d column %d" |
| % (s[pos], lineno, col), |
| ) |
| |
| def report_unused_tweaks(self): |
| if not self.unused_tweaks: |
| return |
| _is = " is" if len(self.unused_tweaks) == 1 else "s are" |
| names = ", ".join(self.unused_tweaks) |
| self.report_error(f"textual tweak{_is} unused in script: {names}") |
| |
| |
| err_handler = ErrorHandler() |
| |
| |
| def tokenize(s): |
| result = [] |
| pos = 0 |
| while pos < len(s): |
| for regex, kind in TOKENS: |
| if m := regex.match(s, pos): |
| if kind is not None: |
| result.append(Token(kind, m.group(0))) |
| pos = m.end() |
| break |
| else: |
| err_handler.report_unexpected_char(s, pos) |
| pos += 1 |
| return result |
| |
| |
| def join_strings(tokens): |
| result = [] |
| for tok in tokens: |
| if tok.kind == TT.string and result and result[-1].kind == TT.string: |
| # If this token is a string, and the previous non-ignored token is |
| # also a string, then merge them into a single token. We need to |
| # discard the closing " of the previous string and the opening " of |
| # this string. |
| prev = result.pop() |
| result.append(Token(TT.string, prev.code[:-1] + tok.code[1:])) |
| else: |
| result.append(tok) |
| return result |
| |
| |
| MacroCall = namedtuple("MacroCall", "name args") |
| |
| |
| class State(Enum): |
| "States of the state machine used for parsing the macro calls." |
| init = auto() |
| after_ident = auto() |
| before_arg = auto() |
| after_arg = auto() |
| |
| |
| def get_calls(tokens, macro_names): |
| state = State.init |
| result = [] |
| current = None |
| for tok in tokens: |
| if state == State.init and tok.kind == TT.ident and tok.code in macro_names: |
| current = MacroCall(tok.code, []) |
| state = State.after_ident |
| elif state == State.after_ident and tok == Token(TT.punct, "("): |
| state = State.before_arg |
| elif state == State.before_arg: |
| if current is not None: |
| current.args.append(tok) |
| state = State.after_arg |
| elif state == State.after_arg and tok.kind == TT.punct: |
| if tok.code == ")": |
| result.append(current) |
| current = None |
| state = State.init |
| elif tok.code == ",": |
| state = State.before_arg |
| else: |
| current = None |
| state = State.init |
| return result |
| |
| |
| # The information will be extracted from calls to these two macros: |
| # #define ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEFAULT_VAL) |
| # #define ANALYZER_OPTION_DEPENDS_ON_USER_MODE(TYPE, NAME, CMDFLAG, DESC, |
| # SHALLOW_VAL, DEEP_VAL) |
| |
| MACRO_NAMES_PARAMCOUNTS = { |
| "ANALYZER_OPTION": 5, |
| "ANALYZER_OPTION_DEPENDS_ON_USER_MODE": 6, |
| } |
| |
| |
| def string_value(tok): |
| if tok.kind != TT.string: |
| raise ValueError(f"expected a string token, got {tok.kind.name}") |
| text = tok.code[1:-1] # Remove quotes |
| text = re.sub(r"\\(.)", r"\1", text) # Resolve backslash escapes |
| return text |
| |
| |
| def cmdflag_to_rst_title(cmdflag_tok): |
| text = string_value(cmdflag_tok) |
| underline = "-" * len(text) |
| ref = f".. _analyzer-option-{text}:" |
| |
| return f"{ref}\n\n{text}\n{underline}\n\n" |
| |
| |
| def desc_to_rst_paragraphs(tok): |
| desc = string_value(tok) |
| |
| # Escape some characters that have special meaning in RST: |
| desc = err_handler.replace_as_tweak(desc, "*", r"\*", "escape star") |
| desc = err_handler.replace_as_tweak(desc, "_", r"\_", "escape underline") |
| |
| # Many descriptions end with "Value: <list of accepted values>", which is |
| # OK for a terse command line printout, but should be prettified for web |
| # documentation. |
| # Moreover, the option ctu-invocation-list shows some example file content |
| # which is formatted as a preformatted block. |
| paragraphs = [desc] |
| extra = "" |
| if m := re.search(r"(^|\s)Value:", desc): |
| err_handler.record_use_of_tweak("accepted values") |
| paragraphs = [desc[: m.start()], "Accepted values:" + desc[m.end() :]] |
| elif m := re.search(r"\s*Example file.content:", desc): |
| err_handler.record_use_of_tweak("example file content") |
| paragraphs = [desc[: m.start()]] |
| extra = "Example file content::\n\n " + desc[m.end() :] + "\n\n" |
| |
| wrapped = [textwrap.fill(p, width=80) for p in paragraphs if p.strip()] |
| |
| return "\n\n".join(wrapped + [""]) + extra |
| |
| |
| def default_to_rst(tok): |
| if tok.kind == TT.string: |
| if tok.code == '""': |
| return "(empty string)" |
| return tok.code |
| if tok.kind == TT.ident: |
| return tok.code |
| if tok.kind == TT.number: |
| return tok.code.replace("'", "") |
| raise ValueError(f"unexpected token as default value: {tok.kind.name}") |
| |
| |
| def defaults_to_rst_paragraph(defaults): |
| strs = [default_to_rst(d) for d in defaults] |
| |
| if len(strs) == 1: |
| return f"Default value: {strs[0]}\n\n" |
| if len(strs) == 2: |
| return ( |
| f"Default value: {strs[0]} (in shallow mode) / {strs[1]} (in deep mode)\n\n" |
| ) |
| raise ValueError("unexpected count of default values: %d" % len(defaults)) |
| |
| |
| def macro_call_to_rst_paragraphs(macro_call): |
| try: |
| arg_count = len(macro_call.args) |
| param_count = MACRO_NAMES_PARAMCOUNTS[macro_call.name] |
| if arg_count != param_count: |
| raise ValueError( |
| f"expected {param_count} arguments for {macro_call.name}, found {arg_count}" |
| ) |
| |
| _, _, cmdflag, desc, *defaults = macro_call.args |
| |
| return ( |
| cmdflag_to_rst_title(cmdflag) |
| + desc_to_rst_paragraphs(desc) |
| + defaults_to_rst_paragraph(defaults) |
| ) |
| except ValueError as ve: |
| err_handler.report_error(ve.args[0]) |
| return "" |
| |
| |
| def get_option_list(input_file): |
| with open(input_file, encoding="utf-8") as f: |
| contents = f.read() |
| tokens = join_strings(tokenize(contents)) |
| macro_calls = get_calls(tokens, MACRO_NAMES_PARAMCOUNTS) |
| |
| result = "" |
| for mc in macro_calls: |
| result += macro_call_to_rst_paragraphs(mc) |
| return result |
| |
| |
| p = argparse.ArgumentParser() |
| p.add_argument("--options-def", help="path to AnalyzerOptions.def") |
| p.add_argument("--template", help="template file") |
| p.add_argument("--out", help="output file") |
| opts = p.parse_args() |
| |
| with open(opts.template, encoding="utf-8") as f: |
| doc_template = f.read() |
| |
| PLACEHOLDER = ".. OPTIONS_LIST_PLACEHOLDER\n" |
| |
| rst_output = doc_template.replace(PLACEHOLDER, get_option_list(opts.options_def)) |
| |
| err_handler.report_unused_tweaks() |
| |
| with open(opts.out, "w", newline="", encoding="utf-8") as f: |
| f.write(rst_output) |
| |
| if err_handler.seen_errors: |
| sys.exit(1) |