| ; RUN: llc -O2 -tail-dup-size=100 -enable-tail-merge=0 < %s | FileCheck %s |
| target triple = "nvptx64-nvidia-cuda" |
| declare void @llvm.nvvm.barrier0() |
| ; syncthreads shouldn't be duplicated. |
| ; CHECK: .func call_syncthreads |
| define void @call_syncthreads(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind { |
| br i1 %cond, label %L1, label %L2 |
| br i1 %cond2, label %Ret, label %L1 |
| call void @llvm.nvvm.barrier0() |
| ; Check that call_syncthreads really does trigger tail duplication. |
| define void @call_foo(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind { |
| br i1 %cond, label %L1, label %L2 |
| br i1 %cond2, label %Ret, label %L1 |