| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize32 -passes=instcombine -S < %s | FileCheck --check-prefixes=CHECK,WAVE32 %s |
| ; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize64 -passes=instcombine -S < %s | FileCheck --check-prefixes=CHECK,WAVE64 %s |
| |
| ; As the addition of 32 does not overflow, it can be canonicalized as gep. |
| define amdgpu_ps <2 x float> @turn_add_into_gep(ptr addrspace(1) inreg %sbase) { |
| ; CHECK-LABEL: define amdgpu_ps <2 x float> @turn_add_into_gep( |
| ; CHECK-SAME: ptr addrspace(1) inreg [[SBASE:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[V:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) |
| ; CHECK-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[V]], 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[MUL]] to i64 |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(1) [[SBASE]], i64 [[TMP1]] |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP2]], i64 128 |
| ; CHECK-NEXT: [[LOAD:%.*]] = load <2 x float>, ptr addrspace(1) [[GEP]], align 8 |
| ; CHECK-NEXT: ret <2 x float> [[LOAD]] |
| ; |
| %v = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) |
| %mul = shl i32 %v, 1 |
| %add = add i32 %mul, 32 |
| %zext.offset = zext i32 %add to i64 |
| %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset |
| %load = load <2 x float>, ptr addrspace(1) %gep |
| ret <2 x float> %load |
| } |
| |
| declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; WAVE32: {{.*}} |
| ; WAVE64: {{.*}} |