| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt -mtriple=amdgcn -mcpu=gfx1010 -passes=instcombine -S < %s | FileCheck %s |
| |
| ; Use readfirstlane to demonstrate when InstCombine deems an input to |
| ; be trivially uniform. |
| |
| ; Constants are trivially uniform. |
| define i32 @test_constant() { |
| ; CHECK-LABEL: define i32 @test_constant( |
| ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: ret i32 7 |
| ; |
| %r = call i32 @llvm.amdgcn.readfirstlane(i32 7) |
| ret i32 %r |
| } |
| |
| ; The result of an AlwaysUniform intrinsic is trivially uniform. |
| define i32 @test_intrinsic(i32 %x) { |
| ; CHECK-LABEL: define i32 @test_intrinsic( |
| ; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[Y:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[X]]) |
| ; CHECK-NEXT: ret i32 [[Y]] |
| ; |
| %y = call i32 @llvm.amdgcn.readfirstlane(i32 %x) |
| %r = call i32 @llvm.amdgcn.readfirstlane(i32 %y) |
| ret i32 %r |
| } |
| |
| ; In compute kernels, all arguments are trivially uniform. |
| |
| define amdgpu_kernel void @test_compute_i32(ptr %out, i32 %x) { |
| ; CHECK-LABEL: define amdgpu_kernel void @test_compute_i32( |
| ; CHECK-SAME: ptr [[OUT:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: store i32 [[X]], ptr [[OUT]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %r = call i32 @llvm.amdgcn.readfirstlane(i32 %x) |
| store i32 %r, ptr %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_compute_i1(ptr %out, i1 %x) { |
| ; CHECK-LABEL: define amdgpu_kernel void @test_compute_i1( |
| ; CHECK-SAME: ptr [[OUT:%.*]], i1 [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: store i1 [[X]], ptr [[OUT]], align 1 |
| ; CHECK-NEXT: ret void |
| ; |
| %r = call i1 @llvm.amdgcn.readfirstlane(i1 %x) |
| store i1 %r, ptr %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_compute_v32i1(ptr %out, <32 x i1> %x) { |
| ; CHECK-LABEL: define amdgpu_kernel void @test_compute_v32i1( |
| ; CHECK-SAME: ptr [[OUT:%.*]], <32 x i1> [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: store <32 x i1> [[X]], ptr [[OUT]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x) |
| store <32 x i1> %r, ptr %out |
| ret void |
| } |
| |
| ; In graphics shaders, inreg arguments are trivially uniform. |
| |
| define amdgpu_ps i32 @test_graphics_i32(i32 inreg %x) { |
| ; CHECK-LABEL: define amdgpu_ps i32 @test_graphics_i32( |
| ; CHECK-SAME: i32 inreg [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: ret i32 [[X]] |
| ; |
| %r = call i32 @llvm.amdgcn.readfirstlane(i32 %x) |
| ret i32 %r |
| } |
| |
| define amdgpu_ps i1 @test_graphics_i1(i1 inreg %x) { |
| ; CHECK-LABEL: define amdgpu_ps i1 @test_graphics_i1( |
| ; CHECK-SAME: i1 inreg [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: ret i1 [[X]] |
| ; |
| %r = call i1 @llvm.amdgcn.readfirstlane(i1 %x) |
| ret i1 %r |
| } |
| |
| define amdgpu_ps <32 x i1> @test_graphics_v32i1(<32 x i1> inreg %x) { |
| ; CHECK-LABEL: define amdgpu_ps <32 x i1> @test_graphics_v32i1( |
| ; CHECK-SAME: <32 x i1> inreg [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: ret <32 x i1> [[X]] |
| ; |
| %r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x) |
| ret <32 x i1> %r |
| } |
| |
| ; In graphics shaders, non-inreg arguments are not trivially uniform. |
| |
| define amdgpu_ps i32 @test_graphics_i32_negative(i32 %x) { |
| ; CHECK-LABEL: define amdgpu_ps i32 @test_graphics_i32_negative( |
| ; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[X]]) |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %r = call i32 @llvm.amdgcn.readfirstlane(i32 %x) |
| ret i32 %r |
| } |
| |
| define amdgpu_ps i1 @test_graphics_i1_negative(i1 %x) { |
| ; CHECK-LABEL: define amdgpu_ps i1 @test_graphics_i1_negative( |
| ; CHECK-SAME: i1 [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[R:%.*]] = call i1 @llvm.amdgcn.readfirstlane.i1(i1 [[X]]) |
| ; CHECK-NEXT: ret i1 [[R]] |
| ; |
| %r = call i1 @llvm.amdgcn.readfirstlane(i1 %x) |
| ret i1 %r |
| } |
| |
| define amdgpu_ps <32 x i1> @test_graphics_v32i1_negative(<32 x i1> %x) { |
| ; CHECK-LABEL: define amdgpu_ps <32 x i1> @test_graphics_v32i1_negative( |
| ; CHECK-SAME: <32 x i1> [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[R:%.*]] = call <32 x i1> @llvm.amdgcn.readfirstlane.v32i1(<32 x i1> [[X]]) |
| ; CHECK-NEXT: ret <32 x i1> [[R]] |
| ; |
| %r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x) |
| ret <32 x i1> %r |
| } |
| |
| ; Test i1 arguments in non-entry functions. |
| |
| define amdgpu_gfx i1 @test_callable_i1(i1 inreg %x) { |
| ; CHECK-LABEL: define amdgpu_gfx i1 @test_callable_i1( |
| ; CHECK-SAME: i1 inreg [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: ret i1 [[X]] |
| ; |
| %r = call i1 @llvm.amdgcn.readfirstlane(i1 %x) |
| ret i1 %r |
| } |
| |
| define amdgpu_gfx <32 x i1> @test_callable_v32i1(<32 x i1> inreg %x) { |
| ; CHECK-LABEL: define amdgpu_gfx <32 x i1> @test_callable_v32i1( |
| ; CHECK-SAME: <32 x i1> inreg [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: ret <32 x i1> [[X]] |
| ; |
| %r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x) |
| ret <32 x i1> %r |
| } |
| |
| define amdgpu_gfx i1 @test_callable_i1_negative(i1 %x) { |
| ; CHECK-LABEL: define amdgpu_gfx i1 @test_callable_i1_negative( |
| ; CHECK-SAME: i1 [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[R:%.*]] = call i1 @llvm.amdgcn.readfirstlane.i1(i1 [[X]]) |
| ; CHECK-NEXT: ret i1 [[R]] |
| ; |
| %r = call i1 @llvm.amdgcn.readfirstlane(i1 %x) |
| ret i1 %r |
| } |
| |
| define amdgpu_gfx <32 x i1> @test_callable_v32i1_negative(<32 x i1> %x) { |
| ; CHECK-LABEL: define amdgpu_gfx <32 x i1> @test_callable_v32i1_negative( |
| ; CHECK-SAME: <32 x i1> [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[R:%.*]] = call <32 x i1> @llvm.amdgcn.readfirstlane.v32i1(<32 x i1> [[X]]) |
| ; CHECK-NEXT: ret <32 x i1> [[R]] |
| ; |
| %r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x) |
| ret <32 x i1> %r |
| } |