| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: opt -S -mtriple=amdgcn-- -passes=separate-const-offset-from-gep,slsr,gvn < %s | FileCheck %s |
| |
| target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" |
| |
| |
| define amdgpu_kernel void @slsr_after_reassociate_global_geps_mubuf_max_offset(ptr addrspace(1) %out, ptr addrspace(1) noalias %arr, i32 %i) { |
| ; CHECK-LABEL: define amdgpu_kernel void @slsr_after_reassociate_global_geps_mubuf_max_offset( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) noalias [[ARR:%.*]], i32 [[I:%.*]]) { |
| ; CHECK-NEXT: bb: |
| ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[I]] to i64 |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr addrspace(1) [[ARR]], i64 [[TMP0]] |
| ; CHECK-NEXT: [[P12:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP1]], i64 4092 |
| ; CHECK-NEXT: [[V11:%.*]] = load i32, ptr addrspace(1) [[P12]], align 4 |
| ; CHECK-NEXT: store i32 [[V11]], ptr addrspace(1) [[OUT]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 2 |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP1]], i64 [[TMP2]] |
| ; CHECK-NEXT: [[P24:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP3]], i64 4092 |
| ; CHECK-NEXT: [[V22:%.*]] = load i32, ptr addrspace(1) [[P24]], align 4 |
| ; CHECK-NEXT: store i32 [[V22]], ptr addrspace(1) [[OUT]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| bb: |
| %i2 = shl nsw i32 %i, 1 |
| %j1 = add nsw i32 %i, 1023 |
| %tmp = sext i32 %j1 to i64 |
| %p1 = getelementptr inbounds float, ptr addrspace(1) %arr, i64 %tmp |
| %v11 = load i32, ptr addrspace(1) %p1, align 4 |
| store i32 %v11, ptr addrspace(1) %out, align 4 |
| |
| %j2 = add nsw i32 %i2, 1023 |
| %tmp5 = sext i32 %j2 to i64 |
| %p2 = getelementptr inbounds float, ptr addrspace(1) %arr, i64 %tmp5 |
| %v22 = load i32, ptr addrspace(1) %p2, align 4 |
| store i32 %v22, ptr addrspace(1) %out, align 4 |
| |
| ret void |
| } |
| |
| define amdgpu_kernel void @slsr_after_reassociate_global_geps_over_mubuf_max_offset(ptr addrspace(1) %out, ptr addrspace(1) noalias %arr, i32 %i) { |
| ; CHECK-LABEL: define amdgpu_kernel void @slsr_after_reassociate_global_geps_over_mubuf_max_offset( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) noalias [[ARR:%.*]], i32 [[I:%.*]]) { |
| ; CHECK-NEXT: bb: |
| ; CHECK-NEXT: [[J1:%.*]] = add nsw i32 [[I]], 1024 |
| ; CHECK-NEXT: [[TMP:%.*]] = sext i32 [[J1]] to i64 |
| ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[ARR]], i64 [[TMP]] |
| ; CHECK-NEXT: [[V11:%.*]] = load i32, ptr addrspace(1) [[P1]], align 4 |
| ; CHECK-NEXT: store i32 [[V11]], ptr addrspace(1) [[OUT]], align 4 |
| ; CHECK-NEXT: [[J2:%.*]] = add i32 [[J1]], [[I]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[J2]] to i64 |
| ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[ARR]], i64 [[TMP5]] |
| ; CHECK-NEXT: [[V22:%.*]] = load i32, ptr addrspace(1) [[P2]], align 4 |
| ; CHECK-NEXT: store i32 [[V22]], ptr addrspace(1) [[OUT]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| bb: |
| %i2 = shl nsw i32 %i, 1 |
| %j1 = add nsw i32 %i, 1024 |
| %tmp = sext i32 %j1 to i64 |
| %p1 = getelementptr inbounds float, ptr addrspace(1) %arr, i64 %tmp |
| %v11 = load i32, ptr addrspace(1) %p1, align 4 |
| store i32 %v11, ptr addrspace(1) %out, align 4 |
| |
| %j2 = add nsw i32 %i2, 1024 |
| %tmp5 = sext i32 %j2 to i64 |
| %p2 = getelementptr inbounds float, ptr addrspace(1) %arr, i64 %tmp5 |
| %v22 = load i32, ptr addrspace(1) %p2, align 4 |
| store i32 %v22, ptr addrspace(1) %out, align 4 |
| |
| ret void |
| } |
| |
| |
| define amdgpu_kernel void @slsr_after_reassociate_lds_geps_ds_max_offset(ptr addrspace(1) %out, ptr addrspace(3) noalias %arr, i32 %i) { |
| ; CHECK-LABEL: define amdgpu_kernel void @slsr_after_reassociate_lds_geps_ds_max_offset( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(3) noalias [[ARR:%.*]], i32 [[I:%.*]]) { |
| ; CHECK-NEXT: bb: |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr addrspace(3) [[ARR]], i32 [[I]] |
| ; CHECK-NEXT: [[P12:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 65532 |
| ; CHECK-NEXT: [[V11:%.*]] = load i32, ptr addrspace(3) [[P12]], align 4 |
| ; CHECK-NEXT: store i32 [[V11]], ptr addrspace(1) [[OUT]], align 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[I]], 2 |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 [[TMP1]] |
| ; CHECK-NEXT: [[P24:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i32 65532 |
| ; CHECK-NEXT: [[V22:%.*]] = load i32, ptr addrspace(3) [[P24]], align 4 |
| ; CHECK-NEXT: store i32 [[V22]], ptr addrspace(1) [[OUT]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| bb: |
| %i2 = shl nsw i32 %i, 1 |
| %j1 = add nsw i32 %i, 16383 |
| %p1 = getelementptr inbounds float, ptr addrspace(3) %arr, i32 %j1 |
| %v11 = load i32, ptr addrspace(3) %p1, align 4 |
| store i32 %v11, ptr addrspace(1) %out, align 4 |
| |
| %j2 = add nsw i32 %i2, 16383 |
| %p2 = getelementptr inbounds float, ptr addrspace(3) %arr, i32 %j2 |
| %v22 = load i32, ptr addrspace(3) %p2, align 4 |
| store i32 %v22, ptr addrspace(1) %out, align 4 |
| |
| ret void |
| } |
| |
| define amdgpu_kernel void @slsr_after_reassociate_lds_geps_over_ds_max_offset(ptr addrspace(1) %out, ptr addrspace(3) noalias %arr, i32 %i) { |
| ; CHECK-LABEL: define amdgpu_kernel void @slsr_after_reassociate_lds_geps_over_ds_max_offset( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(3) noalias [[ARR:%.*]], i32 [[I:%.*]]) { |
| ; CHECK-NEXT: bb: |
| ; CHECK-NEXT: [[J1:%.*]] = add nsw i32 [[I]], 16384 |
| ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[ARR]], i32 [[J1]] |
| ; CHECK-NEXT: [[V11:%.*]] = load i32, ptr addrspace(3) [[P1]], align 4 |
| ; CHECK-NEXT: store i32 [[V11]], ptr addrspace(1) [[OUT]], align 4 |
| ; CHECK-NEXT: [[J2:%.*]] = add i32 [[J1]], [[I]] |
| ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[ARR]], i32 [[J2]] |
| ; CHECK-NEXT: [[V22:%.*]] = load i32, ptr addrspace(3) [[P2]], align 4 |
| ; CHECK-NEXT: store i32 [[V22]], ptr addrspace(1) [[OUT]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| bb: |
| %i2 = shl nsw i32 %i, 1 |
| %j1 = add nsw i32 %i, 16384 |
| %p1 = getelementptr inbounds float, ptr addrspace(3) %arr, i32 %j1 |
| %v11 = load i32, ptr addrspace(3) %p1, align 4 |
| store i32 %v11, ptr addrspace(1) %out, align 4 |
| |
| %j2 = add nsw i32 %i2, 16384 |
| %p2 = getelementptr inbounds float, ptr addrspace(3) %arr, i32 %j2 |
| %v22 = load i32, ptr addrspace(3) %p2, align 4 |
| store i32 %v22, ptr addrspace(1) %out, align 4 |
| |
| ret void |
| } |