|  | ; RUN: opt -S -passes=jump-threading,verify < %s | FileCheck %s | 
|  |  | 
|  | target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" | 
|  | target triple = "nvptx64-nvidia-cuda" | 
|  |  | 
|  | $wrapped_tid = comdat any | 
|  |  | 
|  | $foo = comdat any | 
|  |  | 
|  | define i32 @wrapped_tid() #0 comdat align 32 { | 
|  | %1 = call i32 @tid() | 
|  | ret i32 %1 | 
|  | } | 
|  |  | 
|  | declare void @llvm.nvvm.barrier.cta.sync.aligned.all(i32) #1 | 
|  |  | 
|  | ; We had a bug where we duplicated basic blocks containing convergent | 
|  | ; functions like @llvm.nvvm.barrier0 below.  Verify that we don't do | 
|  | ; that. | 
|  | define void @foo() local_unnamed_addr #2 comdat align 32 { | 
|  | ; CHECK-LABEL: @foo | 
|  | %1 = call i32 @tid() | 
|  | %2 = urem i32 %1, 7 | 
|  | br label %3 | 
|  |  | 
|  | 3: | 
|  | %4 = icmp eq i32 %1, 0 | 
|  | br i1 %4, label %5, label %6 | 
|  |  | 
|  | 5: | 
|  | call void @bar() | 
|  | br label %6 | 
|  |  | 
|  | 6: | 
|  | ; CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0) | 
|  | ; CHECK-NOT: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0) | 
|  | call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0) | 
|  | %7 = icmp eq i32 %2, 0 | 
|  | br i1 %7, label %11, label %8 | 
|  |  | 
|  | 8: | 
|  | %9 = icmp ult i32 %1, 49 | 
|  | br i1 %9, label %10, label %11 | 
|  |  | 
|  | 10: | 
|  | call void @llvm.trap() | 
|  | unreachable | 
|  |  | 
|  | 11: | 
|  | br label %3 | 
|  | } | 
|  |  | 
|  | declare i32 @tid() #2 | 
|  |  | 
|  | declare void @bar() | 
|  |  | 
|  | declare void @llvm.trap() #3 | 
|  |  | 
|  | attributes #1 = { convergent } | 
|  | attributes #2 = { readnone } | 
|  | attributes #3 = { noreturn } | 
|  | attributes #4 = { convergent } |