| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 |
| ; RUN: opt -S --passes='require<profile-summary>,function(codegenprepare)' < %s | FileCheck %s |
| |
| target triple = "aarch64-unknown-linux-gnu" |
| |
| ; Sink the GEP to make use of scalar+vector addressing modes. |
| define <vscale x 4 x float> @gather_offsets_sink_gep(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) { |
| ; CHECK-LABEL: define <vscale x 4 x float> @gather_offsets_sink_gep( |
| ; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] |
| ; CHECK: cond.block: |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i32> [[INDICES]] |
| ; CHECK-NEXT: [[RET:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 [[TMP0]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison) |
| ; CHECK-NEXT: ret <vscale x 4 x float> [[RET]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer |
| ; |
| entry: |
| %ptrs = getelementptr float, ptr %base, <vscale x 4 x i32> %indices |
| br i1 %cond, label %cond.block, label %exit |
| |
| cond.block: |
| %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison) |
| br label %exit |
| |
| exit: |
| %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ] |
| ret <vscale x 4 x float> %ret |
| } |
| |
| ; Sink sext to make use of scalar+sxtw(vector) addressing modes. |
| define <vscale x 4 x float> @gather_offsets_sink_sext(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) { |
| ; CHECK-LABEL: define <vscale x 4 x float> @gather_offsets_sink_sext( |
| ; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] |
| ; CHECK: cond.block: |
| ; CHECK-NEXT: [[TMP0:%.*]] = sext <vscale x 4 x i32> [[INDICES]] to <vscale x 4 x i64> |
| ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]] |
| ; CHECK-NEXT: [[RET:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 [[PTRS]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison) |
| ; CHECK-NEXT: ret <vscale x 4 x float> [[RET]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer |
| ; |
| entry: |
| %indices.sext = sext <vscale x 4 x i32> %indices to <vscale x 4 x i64> |
| br i1 %cond, label %cond.block, label %exit |
| |
| cond.block: |
| %ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext |
| %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison) |
| br label %exit |
| |
| exit: |
| %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ] |
| ret <vscale x 4 x float> %ret |
| } |
| |
| ; As above but ensure both the GEP and sext is sunk. |
| define <vscale x 4 x float> @gather_offsets_sink_sext_get(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) { |
| ; CHECK-LABEL: define <vscale x 4 x float> @gather_offsets_sink_sext_get( |
| ; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] |
| ; CHECK: cond.block: |
| ; CHECK-NEXT: [[TMP0:%.*]] = sext <vscale x 4 x i32> [[INDICES]] to <vscale x 4 x i64> |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]] |
| ; CHECK-NEXT: [[RET:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 [[TMP1]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison) |
| ; CHECK-NEXT: ret <vscale x 4 x float> [[RET]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer |
| ; |
| entry: |
| %indices.sext = sext <vscale x 4 x i32> %indices to <vscale x 4 x i64> |
| %ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext |
| br i1 %cond, label %cond.block, label %exit |
| |
| cond.block: |
| %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison) |
| br label %exit |
| |
| exit: |
| %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ] |
| ret <vscale x 4 x float> %ret |
| } |
| |
| ; Don't sink GEPs that cannot benefit from SVE's scalar+vector addressing modes. |
| define <vscale x 4 x float> @gather_no_scalar_base(<vscale x 4 x ptr> %bases, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) { |
| ; CHECK-LABEL: define <vscale x 4 x float> @gather_no_scalar_base( |
| ; CHECK-SAME: <vscale x 4 x ptr> [[BASES:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr float, <vscale x 4 x ptr> [[BASES]], <vscale x 4 x i32> [[INDICES]] |
| ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] |
| ; CHECK: cond.block: |
| ; CHECK-NEXT: [[RET:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 [[PTRS]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison) |
| ; CHECK-NEXT: ret <vscale x 4 x float> [[RET]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer |
| ; |
| entry: |
| %ptrs = getelementptr float, <vscale x 4 x ptr> %bases, <vscale x 4 x i32> %indices |
| br i1 %cond, label %cond.block, label %exit |
| |
| cond.block: |
| %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison) |
| br label %exit |
| |
| exit: |
| %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ] |
| ret <vscale x 4 x float> %ret |
| } |
| |
| ; Don't sink extends whose result type is already favourable for SVE's sxtw/uxtw addressing modes. |
| ; NOTE: We still want to sink the GEP. |
| define <vscale x 4 x float> @gather_offset_type_too_small(ptr %base, <vscale x 4 x i8> %indices, <vscale x 4 x i1> %mask, i1 %cond) { |
| ; CHECK-LABEL: define <vscale x 4 x float> @gather_offset_type_too_small( |
| ; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i8> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[INDICES_SEXT:%.*]] = sext <vscale x 4 x i8> [[INDICES]] to <vscale x 4 x i32> |
| ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] |
| ; CHECK: cond.block: |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i32> [[INDICES_SEXT]] |
| ; CHECK-NEXT: [[RET:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 [[TMP0]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison) |
| ; CHECK-NEXT: ret <vscale x 4 x float> [[RET]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer |
| ; |
| entry: |
| %indices.sext = sext <vscale x 4 x i8> %indices to <vscale x 4 x i32> |
| %ptrs = getelementptr float, ptr %base, <vscale x 4 x i32> %indices.sext |
| br i1 %cond, label %cond.block, label %exit |
| |
| cond.block: |
| %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison) |
| br label %exit |
| |
| exit: |
| %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ] |
| ret <vscale x 4 x float> %ret |
| } |
| |
| ; Don't sink extends that cannot benefit from SVE's sxtw/uxtw addressing modes. |
| ; NOTE: We still want to sink the GEP. |
| define <vscale x 4 x float> @gather_offset_type_too_big(ptr %base, <vscale x 4 x i48> %indices, <vscale x 4 x i1> %mask, i1 %cond) { |
| ; CHECK-LABEL: define <vscale x 4 x float> @gather_offset_type_too_big( |
| ; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i48> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[INDICES_SEXT:%.*]] = sext <vscale x 4 x i48> [[INDICES]] to <vscale x 4 x i64> |
| ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] |
| ; CHECK: cond.block: |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[INDICES_SEXT]] |
| ; CHECK-NEXT: [[RET:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 [[TMP0]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison) |
| ; CHECK-NEXT: ret <vscale x 4 x float> [[RET]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer |
| ; |
| entry: |
| %indices.sext = sext <vscale x 4 x i48> %indices to <vscale x 4 x i64> |
| %ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext |
| br i1 %cond, label %cond.block, label %exit |
| |
| cond.block: |
| %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison) |
| br label %exit |
| |
| exit: |
| %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ] |
| ret <vscale x 4 x float> %ret |
| } |
| |
| ; Sink zext to make use of scalar+uxtw(vector) addressing modes. |
| ; TODO: There's an argument here to split the extend into i8->i32 and i32->i64, |
| ; which would be especially useful if the i8s are the result of a load because |
| ; it would maintain the use of sign-extending loads. |
| define <vscale x 4 x float> @gather_offset_sink_zext(ptr %base, <vscale x 4 x i8> %indices, <vscale x 4 x i1> %mask, i1 %cond) { |
| ; CHECK-LABEL: define <vscale x 4 x float> @gather_offset_sink_zext( |
| ; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i8> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] |
| ; CHECK: cond.block: |
| ; CHECK-NEXT: [[TMP0:%.*]] = zext <vscale x 4 x i8> [[INDICES]] to <vscale x 4 x i64> |
| ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]] |
| ; CHECK-NEXT: [[RET:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 [[PTRS]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison) |
| ; CHECK-NEXT: ret <vscale x 4 x float> [[RET]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer |
| ; |
| entry: |
| %indices.zext = zext <vscale x 4 x i8> %indices to <vscale x 4 x i64> |
| br i1 %cond, label %cond.block, label %exit |
| |
| cond.block: |
| %ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.zext |
| %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison) |
| br label %exit |
| |
| exit: |
| %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ] |
| ret <vscale x 4 x float> %ret |
| } |
| |
| ; Ensure we support scatters as well as gathers. |
| define void @scatter_offsets_sink_sext_get(<vscale x 4 x float> %data, ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) { |
| ; CHECK-LABEL: define void @scatter_offsets_sink_sext_get( |
| ; CHECK-SAME: <vscale x 4 x float> [[DATA:%.*]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] |
| ; CHECK: cond.block: |
| ; CHECK-NEXT: [[TMP0:%.*]] = sext <vscale x 4 x i32> [[INDICES]] to <vscale x 4 x i64> |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]] |
| ; CHECK-NEXT: tail call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[DATA]], <vscale x 4 x ptr> align 4 [[TMP1]], <vscale x 4 x i1> [[MASK]]) |
| ; CHECK-NEXT: ret void |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %indices.sext = sext <vscale x 4 x i32> %indices to <vscale x 4 x i64> |
| %ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext |
| br i1 %cond, label %cond.block, label %exit |
| |
| cond.block: |
| tail call void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask) |
| br label %exit |
| |
| exit: |
| ret void |
| } |
| |
| declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x float>) |
| declare void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>) |