blob: 75623a59ad4818b1b4faf231015b8811f8abc722 [file] [log] [blame]
# For manual usage, not as a part of lit tests. Used for generating the following tests:
# cmpxchg-sm30.ll, cmpxchg-sm70.ll, cmpxchg-sm90.ll
from string import Template
from itertools import product
cmpxchg_func = Template(
"""define i$size @${success}_${failure}_i${size}_${addrspace}_${ptx_scope}(ptr${addrspace_cast} %addr, i$size %cmp, i$size %new) {
%pairold = cmpxchg ptr${addrspace_cast} %addr, i$size %cmp, i$size %new syncscope(\"${llvm_scope}\") $success $failure
ret i$size %new
}
"""
)
cmpxchg_func_no_scope = Template(
"""define i$size @${success}_${failure}_i${size}_${addrspace}(ptr${addrspace_cast} %addr, i$size %cmp, i$size %new) {
%pairold = cmpxchg ptr${addrspace_cast} %addr, i$size %cmp, i$size %new $success $failure
ret i$size %new
}
"""
)
run_statement = Template(
"""; RUN: llc < %s -march=nvptx64 -mcpu=sm_${sm} -mattr=+ptx${ptx} | FileCheck %s --check-prefix=SM${sm}
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_${sm} -mattr=+ptx${ptx} | %ptxas-verify -arch=sm_${sm} %}
"""
)
def get_addrspace_cast(addrspace):
if addrspace == 0:
return ""
else:
return " addrspace({})".format(str(addrspace))
TESTS = [(60, 50), (70, 63), (90, 87)]
LLVM_SCOPES = ["", "block", "cluster", "device"]
SCOPE_LLVM_TO_PTX = {"": "sys", "block": "cta", "cluster": "cluster", "device": "gpu"}
SUCCESS_ORDERINGS = ["monotonic", "acquire", "release", "acq_rel", "seq_cst"]
FAILURE_ORDERINGS = ["monotonic", "acquire", "seq_cst"]
SIZES = [8, 16, 32, 64]
ADDRSPACES = [0, 1, 3]
ADDRSPACE_NUM_TO_ADDRSPACE = {0: "generic", 1: "global", 3: "shared"}
if __name__ == "__main__":
for sm, ptx in TESTS:
with open("cmpxchg-sm{}.ll".format(str(sm)), "w") as fp:
print(run_statement.substitute(sm=sm, ptx=ptx), file=fp)
# Our test space is: SIZES X SUCCESS_ORDERINGS X FAILURE_ORDERINGS X ADDRSPACES X LLVM_SCOPES
# This is very large, so we instead test 3 slices.
# First slice: are all orderings correctly supported, with and without emulation loops?
# set addrspace to global, scope to cta, generate all possible orderings, for all operation sizes
addrspace, llvm_scope = 1, "block"
for size, success, failure in product(
SIZES, SUCCESS_ORDERINGS, FAILURE_ORDERINGS
):
print(
cmpxchg_func.substitute(
success=success,
failure=failure,
size=size,
addrspace=ADDRSPACE_NUM_TO_ADDRSPACE[addrspace],
addrspace_cast=get_addrspace_cast(addrspace),
llvm_scope=llvm_scope,
ptx_scope=SCOPE_LLVM_TO_PTX[llvm_scope],
),
file=fp,
)
# Second slice: Are all scopes correctlly supported, with and without emulation loops?
# fix addrspace, ordering, generate all possible scopes, for operation sizes i8, i32
addrspace, success, failure = 1, "acq_rel", "acquire"
for size in [8, 32]:
print(
cmpxchg_func_no_scope.substitute(
success=success,
failure=failure,
size=size,
addrspace=ADDRSPACE_NUM_TO_ADDRSPACE[addrspace],
addrspace_cast=get_addrspace_cast(addrspace),
),
file=fp,
)
for llvm_scope in LLVM_SCOPES:
if sm < 90 and llvm_scope == "cluster":
continue
if llvm_scope == "block":
# skip (acq_rel, acquire, global, cta)
continue
print(
cmpxchg_func.substitute(
success=success,
failure=failure,
size=size,
addrspace=ADDRSPACE_NUM_TO_ADDRSPACE[addrspace],
addrspace_cast=get_addrspace_cast(addrspace),
llvm_scope=llvm_scope,
ptx_scope=SCOPE_LLVM_TO_PTX[llvm_scope],
),
file=fp,
)
# Third slice: Are all address spaces correctly supported?
# fix ordering, scope, generate all possible address spaces, for operation sizes i8, i32
success, failure, llvm_scope = "acq_rel", "acquire", "block"
for size, addrspace in product([8, 32], ADDRSPACES):
if addrspace == 1:
# skip (acq_rel, acquire, global, cta)
continue
print(
cmpxchg_func.substitute(
success=success,
failure=failure,
size=size,
addrspace=ADDRSPACE_NUM_TO_ADDRSPACE[addrspace],
addrspace_cast=get_addrspace_cast(addrspace),
llvm_scope=llvm_scope,
ptx_scope=SCOPE_LLVM_TO_PTX[llvm_scope],
),
file=fp,
)