blob: 6b3bae5dc88bbf25397c6b2556feb6fd45db128e [file] [log] [blame]
#!/usr/bin/env python3
#
# This script parses the DejaGNU annotations from the files in the gfortran test
# suite and updates the static test configuration files. This must
# be run whenever the tests are updated with new tests from upstream
# gfortran. There are currently several limitations in the way the annotations
# are parsed and how they are dealt with in the static test
# configuration. These are described in inline comments. The format of the
# static test configuration files is also documented inline.
#
# This script modifies the test configuration files in place. If this is not
# desirable,
#
# update-test-config.py -h
#
# will provide some other options.
import argparse
import chardet
import os
import re
import shutil
import typing
# Class representing a single test. The fields of the test should be those that
# are eventually serialized into the test configuration. The configuration will
# only contain the test kind, the sources and flags as determined directly
# from the DejaGNU annotations in the corresponding source file(s). Any custom
# handling of the test, e.g. to run it conditionally on some platform, should
# not be present, either in this class or in the generated static test
# configuration file.
class Test:
def __init__(
self,
kind: str,
sources: list[str],
options: list[str],
enabled_on: list[str],
disabled_on: list[str],
expected_fail: bool
):
# The kind of the test. This must be one of 'preprocess', 'assemble',
# 'compile', 'link' or 'run'.
self.kind: str = kind
# The sources needed by the test. This will have at least one element.
# The first element of the list will be the "main" file. The rest must
# be in the order in which they should be compiled. The elements will be
# the basenames of the files because all dependent files are in the
# same directory, so there is no need to have the full (or relative)
# path.
self.sources: list[str] = sources
# The command-line flags that are needed to build the test.
#
# FIXME: Currently, only the flags from the main file in multi-file
# tests are recorded. This might need to be fixed.
self.options: list[str] = options
# The optional targets on which the test should be run. The DejaGNU
# targets annotation can be fairly complex with both wildcards and
# logical operators, but we will probably only ever handle "simple"
# targets.
self.enabled_on: list[str] = enabled_on
# The targets for which the test should be excluded.
self.disabled_on: list[str] = disabled_on
# Whether the test is expected to fail. For run tests, this indicates
# the presence of a shouldfail annotation. For all other test kinds,
# a dg-error annotation is present somewhere in the file. In the latter
# case, the error may only manifest on certain targets, but that should
# have been captured in the self.enabled_on member of this class.
self.xfail: bool = expected_fail
def __eq__(self, other):
if not isinstance(other, Test):
return NotImplemented
return self.kind == other.kind and \
self.sources == other.sources and \
self.options == other.options and \
self.xfail == other.xfail and \
self.enabled_on == other.enabled_on and \
self.disabled_on == other.disabled_on
def __str__(self):
return ';'.join([
self.kind,
' '.join(self.sources),
'xfail' if self.xfail else '',
' '.join(self.options),
' '.join(self.enabled_on),
' '.join(self.disabled_on)
])
# The strings containing regexes which will be compiled later.
pfx = '[{][ ]*'
sfx = '[ ]*[}]'
# In DejaGNU, braces can be used instead of quotes ... I think.
res = '[{]?[ ]*(.+?)[ ]*[}]?'
# The target is always optional. It also can be fairly complex. This regex is
# definitely not right for the job, but it will catch the simple cases which is
# all we really intend to support anyway.
tgt = f'({pfx}target[ ]*(?P<target>.+){sfx})?'
re_btxt = re.compile('[{][ ]*(.+?)[ ]*[}]')
re_fortran = re.compile('^.+[.][Ff].*$')
re_assemble = re.compile(f'{pfx}dg-(lto-)?do[ ]*assemble{sfx}')
re_preprocess = re.compile(f'{pfx}dg-do[ ]*preprocess{sfx}')
re_compile = re.compile(f'{pfx}dg-do[ ]*compile[ ]*{tgt}{sfx}')
re_link = re.compile(f'{pfx}dg-(lto-)?do[ ]*link[ ]*{tgt}{sfx}')
re_run = re.compile(f'{pfx}dg-(lto-)?do[ ]*run[ ]*{tgt}{sfx}')
re_sources = re.compile(f'{pfx}dg-additional-sources[ ]*{res}{sfx}')
re_aux_modules = re.compile(f'{pfx}dg-compile-aux-modules[ ]*{res}{sfx}')
re_opts = re.compile(f'{pfx}dg-options[ ]*{res}[ ]*{tgt}{sfx}')
re_addnl_opts = re.compile(f'{pfx}dg-additional-options[ ]*{res}[ ]*{tgt}{sfx}')
re_lto_opts = re.compile(
f'{pfx}dg-lto-options[ ]*'
f'[{{][ ]*({pfx}.+?{sfx}[ ]*)[ ]*[}}][ ]*'
f'{tgt}{sfx}'
)
re_ld_opts = re.compile(f'{pfx}dg-extra-ld-options[ ]*{res}[ ]*{tgt}{sfx}')
re_shouldfail = re.compile(f'{pfx}dg-shouldfail[ ]*.*{sfx}')
re_error = re.compile(f'{pfx}dg-error[ ]*.*{tgt}{sfx}')
re_platform = re.compile('^[A-Za-z0-9*?_]+-[A-Za-z0-9*?_]+-[A-Za-z0-9*?_]+$')
# Maps from known platforms to triples that LLVM will understand.
# FIXME: The ia32 target probably does not always correspond to i386. Does it
# means that it will be enabled on other non-X86 platforms?
platforms = {'ia32': 'i386-*-*'}
# Get the n-th level ancestor of the given file. The 1st level ancestor is
# the directory containing the file. The 2nd level ancestor is the parent of
# that directory and so on.
def get_ancestor(f: str, n: int) -> str:
anc = f
for _ in range(0, n):
anc = os.path.dirname(anc)
return anc
# Get the encoding of the file.
def get_encoding(filepath: str) -> str | None:
with open(filepath, 'rb') as f:
return chardet.detect(f.read())['encoding']
return None
# Get the lines in the file.
def get_lines(filepath: str) -> list[str]:
lines = []
try:
encoding = get_encoding(filepath)
with open(filepath, 'r', encoding = encoding) as f:
lines = f.readlines()
except:
warning('Could not open file: {}', os.path.basename(filepath))
finally:
return lines
# Collect the subdirectories of the gfortran directory which may contain tests.
def get_subdirs(gfortran: str) -> list[str]:
regression = os.path.join(gfortran, 'regression')
torture = os.path.join(gfortran, 'torture')
subdirs = [regression]
for root, dirs, _ in os.walk(regression):
subdirs.extend([os.path.join(root, d) for d in dirs])
subdirs.append(torture)
for root, dirs, _ in os.walk(torture):
subdirs.extend([os.path.join(root, d) for d in dirs])
return subdirs
# Strip any leading and trailing whitespace from the string as well as any
# optional quotes around the string. Then split the string on whitespace and
# return the resulting list.
def qsplit(s: str) -> list[str]:
s = s.strip()
if s.startswith('"'):
s = s[1:]
if s.endswith('"'):
s = s[:-1]
return s.split()
# Drop the leading '{' and trailing '}', if any. This will only drop the
# braces if both are present. The string will be unconditionally stripped of
# leading and trailing whitespace.
def strip_braces(s: str) -> str:
s = s.strip()
if s.startswith('{') and s.endswith('}'):
s = s[1:-1].strip()
return s
# Try to match the line with the regex. If the line matches, add the match
# object to the MOUT list and return True. Otherwise, leave the MOUT list
# unchanged and return False.
def try_match(regex: re.Pattern, line: str, mout: list[re.Match[str]]) -> bool:
m = regex.search(line)
if m:
mout.append(m)
return True
return False
# Count the number of elements in the list that satisfy the predicate.
def count_if(l, predicate):
return sum(1 for e in l if predicate(e))
# Print a message. This is only around to save a bit of typing.
def printf(fmt: str, *args) -> None:
print(fmt.format(*args))
# Print a message.
def message(fmt: str, *args) -> None:
printf(fmt, *args)
# Print a warning message.
def warning(fmt: str, *args) -> None:
printf('WARNING: ' + fmt, *args)
# Print an error message and exit.
def error(fmt: str, *args) -> None:
printf('ERROR: ' + fmt, *args)
exit(1)
# The target is usually a regular expression. But the regex syntax used by
# DejaGNU is not exactly the same as that supported by cmake. This translates
# the DejaGNU regex to a cmake-compatible regex.
def convert_target_regex(t: str) -> str:
# XXX: This translation is not strictly correct.
# In DejaGNU, the ? character matches a single character unless it follows
# an atom. In the target specifications in the gfortran test suite, this is
# only used as a single character match.
t = t.replace('?', '.')
# XXX: This translation is not strictly correct.
# in DejaGNU, the * character can also be a wildcard match for zero or more
# characters unless it follows an atom. In the target specifications in the
# gfortran test suite, it is only used as a wildcard.
t = t.replace('*', '.+')
return t
# Parse the enabled targets from a target specification string. Some of the
# targets may require additional compiler/linker options. Those options are
# returned as well.
def parse_enabled_targets(t: str) -> tuple[list[str], list[str]]:
targets = []
options = []
# An expression can be wrapped with braces. While this seems to be necessary
# for complex expressions, it can be used with simple expressions as well.
t = strip_braces(t)
# A simple expression may be a sequence of targets.
for tgt in t.split(' '):
if re_platform.match(tgt):
targets.append(convert_target_regex(tgt))
elif tgt in platforms:
targets.append(convert_target_regex(platforms[tgt]))
# Some "targets" need to be translated to compiler/linker flags.
elif tgt in ['fopenmp', 'fopenacc', 'pthread']:
options.append('-' + tgt)
elif tgt in ['c99_runtime']:
options.append('-lc')
elif tgt in [
'fd_truncate',
'fortran_large_int',
'fortran_real_10',
'fortran_real_16'
]:
# FIXME: These may need something sane to be done.
pass
elif tgt in [
'arm_eabi',
'avx_runtime',
'fpic',
'libatomic_available',
'vect_simd_clones'
]:
# As far as I can tell, nothing needs to be done for these targets.
pass
else:
warning('Unknown target: {}', tgt)
return targets, options
# Parse the disabled targets from a target specification string.
def parse_disabled_targets(t: str) -> list[str]:
targets = []
# An expression can be wrapped with braces. While this seems to be necessary
# for complex expressions, it can be used with simple expressions as well.
t = strip_braces(t)
# A simple expression may be a sequence of targets.
for tgt in t.split(' '):
if re_platform.match(tgt):
targets.append(convert_target_regex(tgt))
elif tgt in platforms:
targets.append(convert_target_regex(platforms[tgt]))
elif tgt in ['newlib']:
# FIXME: These may need something sane to be done.
pass
else:
warning('Unknown target to disable: {}', tgt)
return targets
# Parse the target specification, if possible.
# This is not guaranteed to parse all target specifications. We don't care
# about the arbitrarily complex expressions that seem to be possible, so this
# will only deal with "simple" expressions. Some of the target expressions
# will be translated to compiler/linker flags. In those cases, update the
# list of flags that are passed in.
def parse_targets_into(
t: str, enabled_on: list[str], disabled_on: list[str], options: list[str]
) -> None:
t = t.strip()
# An expression can be wrapped with braces. While this seems to be necessary
# for complex expressions, it can be used with simple expressions as well.
t = strip_braces(t)
# A simple expression is one which does not have any logical operators.
if ('&&' in t) or ('||' in t):
warning('Ignoring target specification: {}', t)
return
# The only "complex" expression that we handle is a "top-level" negation
# which excludes certain targets.
if t.startswith('!'):
targets = parse_disabled_targets(t[1:].strip())
disabled_on.extend(targets)
else:
targets, opts = parse_enabled_targets(t)
enabled_on.extend(targets)
options.extend(opts)
# Collect the tests in a given directory.
def collect_tests(d: str) -> list[Test]:
tests: list[Test] = []
files: list[str] = []
for e in os.scandir(d):
if e.is_file() and re_fortran.match(e.name):
files.append(e.path)
message('Found {} Fortran files', len(files))
if not len(files):
return tests
# Some files cannot be read because they are invalid UTF-16. Just handle
# those as a special case here. This is a really ugly way of doing things
# but this script is only intended for occasional use, so I am not too
# bothered about this.
remove = []
for f in files:
if f.endswith('regression/bom_error.f90'):
tests.append(
Test('compile', [os.path.basename(f)], [], [], [], True)
)
remove.append(f)
for f in remove:
files.remove(f)
if len(remove):
message(
'Filter known problematic files: {}\n {}',
len(remove),
' \n'.join([os.path.basename(f) for f in remove])
)
# Find all the files that are dependencies of some file that is the
# main file in a test.
dependents = set([])
for filename in files:
for l in get_lines(filename):
mout: list[re.Match] = []
if try_match(re_sources, l, mout) or \
try_match(re_aux_modules, l, mout):
for m in mout:
for src in qsplit(m[1]):
dependents.add(src)
message('Found {} dependent files', len(dependents))
for f in files:
filename = os.path.basename(f)
if filename in dependents:
continue
kind: str | None = None
sources: list[str] = [filename]
options: list[str] = []
enabled_on: list[str] = []
disabled_on: list[str] = []
xfail: bool = False
for l in get_lines(f):
mout = []
if try_match(re_assemble, l, mout):
kind = 'assemble'
elif try_match(re_preprocess, l, mout):
kind = 'preprocess'
elif try_match(re_compile, l, mout):
m = mout[0]
kind = 'compile'
if m['target']:
parse_targets_into(
m['target'], enabled_on, disabled_on, options
)
elif try_match(re_link, l, mout):
m = mout[0]
kind = 'link'
if m['target']:
parse_targets_into(
m['target'], enabled_on, disabled_on, options
)
elif try_match(re_run, l, mout):
m = mout[0]
kind = 'run'
if m['target']:
parse_targets_into(
m['target'], enabled_on, disabled_on, options
)
# TODO: Does lto-run need to be handled differently?
elif try_match(re_shouldfail, l, mout) or \
try_match(re_error, l, mout):
xfail = True
elif try_match(re_sources, l, mout) or \
try_match(re_aux_modules, l, mout):
m = mout[0]
sources.extend(qsplit(m[1]))
elif try_match(re_opts, l, mout) or \
try_match(re_addnl_opts, l, mout) or \
try_match(re_ld_opts, l, mout):
m = mout[0]
# FIXME: This is not correct.
# If the options have a target annotation, those options should
# only be added on a specific target. We currently cannot handle
# this case in the static configuration, so just ignore those
# options entirely for now.
if not m['target']:
options.extend(qsplit(m[1]))
elif try_match(re_lto_opts, l, mout):
m = mout[0]
# FIXME: There are two sets of options in some files. It is
# possible that an arbitrary number of these is allowed, but I
# don't know exactly what it is for, so for now, just use the
# first set.
opts = qsplit(re_btxt.findall(m[1])[0])
# FIXME: This is not correct.
# If the options have a target annotation, those options should
# only be added on a specific target. We currently cannot handle
# this case in the static configuration, so just ignore those
# options entirely for now.
if not m['target']:
options.extend(opts)
# If the kind is missing, assume that it is a compile test except
# for torture/execute where it is an execute test.
anc1 = os.path.basename(get_ancestor(f, 1))
anc2 = os.path.basename(get_ancestor(f, 2))
if not kind:
if anc2 == 'torture' and anc1 == 'execute':
kind = 'run'
else:
kind = 'compile'
tests.append(
Test(kind, sources, options, enabled_on, disabled_on, xfail)
)
# Count the fortran files in the tests. Eventually, we want to ensure
# that all the fortran files are accounted for.
accounted = set([])
for test in tests:
for s in test.sources:
if re_fortran.match(s):
accounted.add(s)
filenames = set([os.path.basename(f) for f in files])
orphans = filenames - set(accounted)
if len(orphans):
error('{} orphan files found\n {}', len(orphans), ' \n'.join(orphans))
order = {'preprocess': 0, 'assemble': 1, 'compile': 2, 'link': 3, 'run': 4}
tests.sort(key = lambda t: (order[t.kind], t.sources[0].lower()))
return tests
# Parse tests from the given file.
def parse_tests(filename: str) -> list[Test]:
tests = []
with open(filename, 'r') as f:
for lno, l in enumerate(f.readlines()):
line = l.strip()
# Lines starting with a # are comment lines.
if not line or line.startswith('#'):
continue
# The format of each non-comment line is specified at the start of
# this file.
elems = l.split(';')
if len(elems) != 6:
error('{}:{}: Unexpected number of elements', filename, lno + 1)
if elems[2] not in ['', 'xfail']:
error(
'{}:{}: Expected error field must be xfail or empty',
filename,
lno + 1
)
kind = elems[0]
sources = elems[1].split(' ')
xfail = True if elems[2] == 'xfail' else False
options = elems[3].split(' ')
enabled_on = elems[4].split(' ')
disabled_on = elems[5].split(' ')
tests.append(
Test(kind, sources, options, enabled_on, disabled_on, xfail)
)
return tests
# Setup the argument parser and return it.
def get_argument_parser():
ap = argparse.ArgumentParser(
description =
'Update the static test configuration files in the gfortran tests '
'within the LLVM test suite. This will update the configuration files '
'within the repository in which this script is contained.',
)
ap.add_argument(
'-b',
'--backup',
default = False,
action = 'store_true',
help =
'create a backup file for each test configuration file before it is '
'updated'
)
return ap
def main() -> int:
ap = get_argument_parser()
args = ap.parse_args()
root = get_ancestor(os.path.realpath(__file__), 4)
gfortran = os.path.join(root, 'Fortran', 'gfortran')
dirs = get_subdirs(gfortran)
stats = {
'total': 0,
'preprocess': 0,
'assemble': 0,
'compile': 0,
'link': 0,
'run': 0
}
for d in dirs:
printf('{}', d)
tests = collect_tests(d)
if not tests:
continue
existing = []
config_file = os.path.join(d, 'tests.cmake')
if os.path.exists(config_file):
message('Backing up test configuration')
existing = parse_tests(config_file)
if args.backup:
shutil.move(config_file, config_file + '.bak')
else:
message('Test configuration not found')
message('Writing test configuration')
with open(config_file, 'w') as f:
f.write("""# This file was generated by update-test-config.py
#
# Each line in this file corresponds to a single test. The format of each line
# is:
#
# <kind>;<sources>;<xfail>;<options>;<enabled-on>;<disabled-on>
#
# where
#
# <kind> is one of 'preprocess', 'assemble', 'compile', 'link' or
# 'run'.
#
# <sources> is a space separated list of sources files that comprise
# the test. The first file is the \"main\" file. The rest
# of the files must be specified in program compilation
# order.
#
# <xfail> if present, must be 'xfail' which indicates that the test
# is expected to trigger a compile-time or runtime error.
#
# <options> is a space separated list of options to be passed to the
# compiler when building the test.
#
# <enabled-on> is a space-separated list of targets on which the test is
# enabled. Each element of the list will be a regular
# expression that is expected to match an LLVM target triple.
# If no targets are provided, the test is enabled on all
# targets.
#
# <disabled-on> is a space-separated list of targets on which the test is
# disabled. Each element of the list will be a regular
# expression that is expected to match an LLVM target triple.
#
""")
f.write('\n'.join([str(t) for t in tests]))
stats['total'] += len(tests)
for k in ['preprocess', 'assemble', 'compile', 'link', 'run']:
stats[k] += count_if(tests, lambda t: t.kind == k)
printf('{:16}{}', 'Found tests', len(tests))
for k in ['preprocess', 'assemble', 'compile', 'link', 'run']:
printf(' {:14}{}', k, count_if(tests, lambda t: t.kind == k))
printf('{:16}{}', 'Existing tests', len(existing))
printf('')
printf('\nTEST SUITE\n')
printf('{:16}{}', 'Found tests', stats['total'])
for k in ['preprocess', 'assemble', 'compile', 'link', 'run']:
printf(' {:14}{}', k, stats[k])
return 0
if __name__ == '__main__':
exit(main())