blob: cf529343c6728be9813667344695929a39eacee1 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
; Test optimization of DUP with extended narrow loads
; This should avoid GPR->SIMD transfers by loading directly into vector registers
define <4 x i16> @test_dup_zextload_i8_v4i16(ptr %p) {
; CHECK-LABEL: test_dup_zextload_i8_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: dup v0.4h, v0.h[0]
; CHECK-NEXT: ret
%load = load i8, ptr %p, align 1
%ext = zext i8 %load to i16
%vec = insertelement <4 x i16> poison, i16 %ext, i32 0
%dup = shufflevector <4 x i16> %vec, <4 x i16> poison, <4 x i32> zeroinitializer
ret <4 x i16> %dup
}
define <8 x i16> @test_dup_zextload_i8_v8i16(ptr %p) {
; CHECK-LABEL: test_dup_zextload_i8_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: dup v0.8h, v0.h[0]
; CHECK-NEXT: ret
%load = load i8, ptr %p, align 1
%ext = zext i8 %load to i16
%vec = insertelement <8 x i16> poison, i16 %ext, i32 0
%dup = shufflevector <8 x i16> %vec, <8 x i16> poison, <8 x i32> zeroinitializer
ret <8 x i16> %dup
}
define <2 x i32> @test_dup_zextload_i8_v2i32(ptr %p) {
; CHECK-LABEL: test_dup_zextload_i8_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: dup v0.2s, v0.s[0]
; CHECK-NEXT: ret
%load = load i8, ptr %p, align 1
%ext = zext i8 %load to i32
%vec = insertelement <2 x i32> poison, i32 %ext, i32 0
%dup = shufflevector <2 x i32> %vec, <2 x i32> poison, <2 x i32> zeroinitializer
ret <2 x i32> %dup
}
define <4 x i32> @test_dup_zextload_i8_v4i32(ptr %p) {
; CHECK-LABEL: test_dup_zextload_i8_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: ret
%load = load i8, ptr %p, align 1
%ext = zext i8 %load to i32
%vec = insertelement <4 x i32> poison, i32 %ext, i32 0
%dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
ret <4 x i32> %dup
}
define <4 x i32> @test_dup_zextload_i8_v4i32_offset(ptr %p) {
; CHECK-LABEL: test_dup_zextload_i8_v4i32_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0, #4]
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: ret
%addr = getelementptr inbounds i8, ptr %p, i64 4
%load = load i8, ptr %addr, align 1
%ext = zext i8 %load to i32
%vec = insertelement <4 x i32> poison, i32 %ext, i32 0
%dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
ret <4 x i32> %dup
}
define <4 x i32> @test_dup_zextload_i8_v4i32_reg_offset(ptr %p, i64 %offset) {
; CHECK-LABEL: test_dup_zextload_i8_v4i32_reg_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0, x1]
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: ret
%addr = getelementptr inbounds i8, ptr %p, i64 %offset
%load = load i8, ptr %addr, align 1
%ext = zext i8 %load to i32
%vec = insertelement <4 x i32> poison, i32 %ext, i32 0
%dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
ret <4 x i32> %dup
}
define <2 x i64> @test_dup_zextload_i8_v2i64(ptr %p) {
; CHECK-LABEL: test_dup_zextload_i8_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: dup v0.2d, v0.d[0]
; CHECK-NEXT: ret
%load = load i8, ptr %p, align 1
%ext = zext i8 %load to i64
%vec = insertelement <2 x i64> poison, i64 %ext, i32 0
%dup = shufflevector <2 x i64> %vec, <2 x i64> poison, <2 x i32> zeroinitializer
ret <2 x i64> %dup
}
define <2 x i32> @test_dup_zextload_i16_v2i32(ptr %p) {
; CHECK-LABEL: test_dup_zextload_i16_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: dup v0.2s, v0.s[0]
; CHECK-NEXT: ret
%load = load i16, ptr %p, align 1
%ext = zext i16 %load to i32
%vec = insertelement <2 x i32> poison, i32 %ext, i32 0
%dup = shufflevector <2 x i32> %vec, <2 x i32> poison, <2 x i32> zeroinitializer
ret <2 x i32> %dup
}
define <4 x i32> @test_dup_zextload_i16_v4i32(ptr %p) {
; CHECK-LABEL: test_dup_zextload_i16_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: ret
%load = load i16, ptr %p, align 1
%ext = zext i16 %load to i32
%vec = insertelement <4 x i32> poison, i32 %ext, i32 0
%dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
ret <4 x i32> %dup
}
define <4 x i32> @test_dup_zextload_i16_v4i32_offset(ptr %p) {
; CHECK-LABEL: test_dup_zextload_i16_v4i32_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0, #8]
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: ret
%addr = getelementptr inbounds i16, ptr %p, i64 4
%load = load i16, ptr %addr, align 1
%ext = zext i16 %load to i32
%vec = insertelement <4 x i32> poison, i32 %ext, i32 0
%dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
ret <4 x i32> %dup
}
define <4 x i32> @test_dup_zextload_i16_v4i32_reg_offset(ptr %p, i64 %offset) {
; CHECK-LABEL: test_dup_zextload_i16_v4i32_reg_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: ret
%addr = getelementptr inbounds i16, ptr %p, i64 %offset
%load = load i16, ptr %addr, align 1
%ext = zext i16 %load to i32
%vec = insertelement <4 x i32> poison, i32 %ext, i32 0
%dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
ret <4 x i32> %dup
}
define <2 x i64> @test_dup_zextload_i16_v2i64(ptr %p) {
; CHECK-LABEL: test_dup_zextload_i16_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: dup v0.2d, v0.d[0]
; CHECK-NEXT: ret
%load = load i16, ptr %p, align 1
%ext = zext i16 %load to i64
%vec = insertelement <2 x i64> poison, i64 %ext, i32 0
%dup = shufflevector <2 x i64> %vec, <2 x i64> poison, <2 x i32> zeroinitializer
ret <2 x i64> %dup
}
define <2 x i64> @test_dup_zextload_i32_v2i64(ptr %p) {
; CHECK-LABEL: test_dup_zextload_i32_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: dup v0.2d, v0.d[0]
; CHECK-NEXT: ret
%load = load i32, ptr %p, align 1
%ext = zext i32 %load to i64
%vec = insertelement <2 x i64> poison, i64 %ext, i32 0
%dup = shufflevector <2 x i64> %vec, <2 x i64> poison, <2 x i32> zeroinitializer
ret <2 x i64> %dup
}