| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py |
| // RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck %s |
| |
| // REQUIRES: amdgpu-registered-target |
| |
| typedef unsigned int uint; |
| |
| // CHECK-LABEL: @test_s_sleep_var( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: store i32 [[D:%.*]], ptr addrspace(5) [[D_ADDR]], align 4 |
| // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[D_ADDR]], align 4 |
| // CHECK-NEXT: call void @llvm.amdgcn.s.sleep.var(i32 [[TMP0]]) |
| // CHECK-NEXT: call void @llvm.amdgcn.s.sleep.var(i32 15) |
| // CHECK-NEXT: ret void |
| // |
| void test_s_sleep_var(int d) |
| { |
| __builtin_amdgcn_s_sleep_var(d); |
| __builtin_amdgcn_s_sleep_var(15); |
| } |
| |
| // CHECK-LABEL: @test_permlane16_var( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) |
| // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8 |
| // CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4 |
| // CHECK-NEXT: store i32 [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 4 |
| // CHECK-NEXT: store i32 [[C:%.*]], ptr addrspace(5) [[C_ADDR]], align 4 |
| // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4 |
| // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[B_ADDR]], align 4 |
| // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[C_ADDR]], align 4 |
| // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlane16.var(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i1 false, i1 false) |
| // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 |
| // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 |
| // CHECK-NEXT: ret void |
| // |
| void test_permlane16_var(global uint* out, uint a, uint b, uint c) { |
| *out = __builtin_amdgcn_permlane16_var(a, b, c, 0, 0); |
| } |
| |
| // CHECK-LABEL: @test_permlanex16_var( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) |
| // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8 |
| // CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4 |
| // CHECK-NEXT: store i32 [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 4 |
| // CHECK-NEXT: store i32 [[C:%.*]], ptr addrspace(5) [[C_ADDR]], align 4 |
| // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4 |
| // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[B_ADDR]], align 4 |
| // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[C_ADDR]], align 4 |
| // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlanex16.var(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i1 false, i1 false) |
| // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 |
| // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 |
| // CHECK-NEXT: ret void |
| // |
| void test_permlanex16_var(global uint* out, uint a, uint b, uint c) { |
| *out = __builtin_amdgcn_permlanex16_var(a, b, c, 0, 0); |
| } |
| |
| // CHECK-LABEL: @test_s_barrier_signal( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal(i32 -1) |
| // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 -1) |
| // CHECK-NEXT: ret void |
| // |
| void test_s_barrier_signal() |
| { |
| __builtin_amdgcn_s_barrier_signal(-1); |
| __builtin_amdgcn_s_barrier_wait(-1); |
| } |
| |
| // CHECK-LABEL: @test_s_barrier_signal_var( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4 |
| // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4 |
| // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal.var(i32 [[TMP0]]) |
| // CHECK-NEXT: ret void |
| // |
| void test_s_barrier_signal_var(int a) |
| { |
| __builtin_amdgcn_s_barrier_signal_var(a); |
| } |
| |
| // CHECK-LABEL: @test_s_barrier_signal_isfirst( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: store ptr [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 8 |
| // CHECK-NEXT: store ptr [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 8 |
| // CHECK-NEXT: store ptr [[C:%.*]], ptr addrspace(5) [[C_ADDR]], align 8 |
| // CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 1) |
| // CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] |
| // CHECK: if.then: |
| // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr addrspace(5) [[B_ADDR]], align 8 |
| // CHECK-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[A_ADDR]], align 8 |
| // CHECK-NEXT: br label [[IF_END:%.*]] |
| // CHECK: if.else: |
| // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[C_ADDR]], align 8 |
| // CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(5) [[A_ADDR]], align 8 |
| // CHECK-NEXT: br label [[IF_END]] |
| // CHECK: if.end: |
| // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 1) |
| // CHECK-NEXT: ret void |
| // |
| void test_s_barrier_signal_isfirst(int* a, int* b, int *c) |
| { |
| if(__builtin_amdgcn_s_barrier_signal_isfirst(1)) |
| a = b; |
| else |
| a = c; |
| |
| __builtin_amdgcn_s_barrier_wait(1); |
| } |
| |
| // CHECK-LABEL: @test_s_barrier_isfirst_var( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: store ptr [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 8 |
| // CHECK-NEXT: store ptr [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 8 |
| // CHECK-NEXT: store ptr [[C:%.*]], ptr addrspace(5) [[C_ADDR]], align 8 |
| // CHECK-NEXT: store i32 [[D:%.*]], ptr addrspace(5) [[D_ADDR]], align 4 |
| // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[D_ADDR]], align 4 |
| // CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst.var(i32 [[TMP0]]) |
| // CHECK-NEXT: br i1 [[TMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] |
| // CHECK: if.then: |
| // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[B_ADDR]], align 8 |
| // CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(5) [[A_ADDR]], align 8 |
| // CHECK-NEXT: br label [[IF_END:%.*]] |
| // CHECK: if.else: |
| // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr addrspace(5) [[C_ADDR]], align 8 |
| // CHECK-NEXT: store ptr [[TMP3]], ptr addrspace(5) [[A_ADDR]], align 8 |
| // CHECK-NEXT: br label [[IF_END]] |
| // CHECK: if.end: |
| // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 1) |
| // CHECK-NEXT: ret void |
| // |
| void test_s_barrier_isfirst_var(int* a, int* b, int *c, int d) |
| { |
| if ( __builtin_amdgcn_s_barrier_signal_isfirst_var(d)) |
| a = b; |
| else |
| a = c; |
| |
| __builtin_amdgcn_s_barrier_wait(1); |
| |
| } |
| |
| // CHECK-LABEL: @test_s_barrier_init( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4 |
| // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4 |
| // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.init(i32 1, i32 [[TMP0]]) |
| // CHECK-NEXT: ret void |
| // |
| void test_s_barrier_init(int a) |
| { |
| __builtin_amdgcn_s_barrier_init(1, a); |
| } |
| |
| // CHECK-LABEL: @test_s_barrier_join( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.join(i32 1) |
| // CHECK-NEXT: ret void |
| // |
| void test_s_barrier_join() |
| { |
| __builtin_amdgcn_s_barrier_join(1); |
| } |
| |
| // CHECK-LABEL: @test_s_wakeup_barrier( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.join(i32 1) |
| // CHECK-NEXT: ret void |
| // |
| void test_s_wakeup_barrier() |
| { |
| __builtin_amdgcn_s_barrier_join(1); |
| } |
| |
| // CHECK-LABEL: @test_s_barrier_leave( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: store ptr [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 8 |
| // CHECK-NEXT: store ptr [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 8 |
| // CHECK-NEXT: store ptr [[C:%.*]], ptr addrspace(5) [[C_ADDR]], align 8 |
| // CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.s.barrier.leave() |
| // CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] |
| // CHECK: if.then: |
| // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr addrspace(5) [[B_ADDR]], align 8 |
| // CHECK-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[A_ADDR]], align 8 |
| // CHECK-NEXT: br label [[IF_END:%.*]] |
| // CHECK: if.else: |
| // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[C_ADDR]], align 8 |
| // CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(5) [[A_ADDR]], align 8 |
| // CHECK-NEXT: br label [[IF_END]] |
| // CHECK: if.end: |
| // CHECK-NEXT: ret void |
| // |
| void test_s_barrier_leave(int* a, int* b, int *c) |
| { |
| if (__builtin_amdgcn_s_barrier_leave()) |
| a = b; |
| else |
| a = c; |
| } |
| |
| // CHECK-LABEL: @test_s_get_barrier_state( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: [[STATE:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4 |
| // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4 |
| // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.s.get.barrier.state(i32 [[TMP0]]) |
| // CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[STATE]], align 4 |
| // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[STATE]], align 4 |
| // CHECK-NEXT: ret i32 [[TMP2]] |
| // |
| unsigned test_s_get_barrier_state(int a) |
| { |
| unsigned State = __builtin_amdgcn_s_get_barrier_state(a); |
| return State; |
| } |
| |
| // CHECK-LABEL: @test_s_ttracedata( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: call void @llvm.amdgcn.s.ttracedata(i32 1) |
| // CHECK-NEXT: ret void |
| // |
| void test_s_ttracedata() |
| { |
| __builtin_amdgcn_s_ttracedata(1); |
| } |
| |
| // CHECK-LABEL: @test_s_ttracedata_imm( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: call void @llvm.amdgcn.s.ttracedata.imm(i16 1) |
| // CHECK-NEXT: ret void |
| // |
| void test_s_ttracedata_imm() |
| { |
| __builtin_amdgcn_s_ttracedata_imm(1); |
| } |
| |
| |