| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s |
| |
| ; Test optimization of DUP with extended narrow loads |
| ; This should avoid GPR->SIMD transfers by loading directly into vector registers |
| |
| define <4 x i16> @test_dup_zextload_i8_v4i16(ptr %p) { |
| ; CHECK-LABEL: test_dup_zextload_i8_v4i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr b0, [x0] |
| ; CHECK-NEXT: dup v0.4h, v0.h[0] |
| ; CHECK-NEXT: ret |
| %load = load i8, ptr %p, align 1 |
| %ext = zext i8 %load to i16 |
| %vec = insertelement <4 x i16> poison, i16 %ext, i32 0 |
| %dup = shufflevector <4 x i16> %vec, <4 x i16> poison, <4 x i32> zeroinitializer |
| ret <4 x i16> %dup |
| } |
| |
| define <8 x i16> @test_dup_zextload_i8_v8i16(ptr %p) { |
| ; CHECK-LABEL: test_dup_zextload_i8_v8i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr b0, [x0] |
| ; CHECK-NEXT: dup v0.8h, v0.h[0] |
| ; CHECK-NEXT: ret |
| %load = load i8, ptr %p, align 1 |
| %ext = zext i8 %load to i16 |
| %vec = insertelement <8 x i16> poison, i16 %ext, i32 0 |
| %dup = shufflevector <8 x i16> %vec, <8 x i16> poison, <8 x i32> zeroinitializer |
| ret <8 x i16> %dup |
| } |
| |
| define <2 x i32> @test_dup_zextload_i8_v2i32(ptr %p) { |
| ; CHECK-LABEL: test_dup_zextload_i8_v2i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr b0, [x0] |
| ; CHECK-NEXT: dup v0.2s, v0.s[0] |
| ; CHECK-NEXT: ret |
| %load = load i8, ptr %p, align 1 |
| %ext = zext i8 %load to i32 |
| %vec = insertelement <2 x i32> poison, i32 %ext, i32 0 |
| %dup = shufflevector <2 x i32> %vec, <2 x i32> poison, <2 x i32> zeroinitializer |
| ret <2 x i32> %dup |
| } |
| |
| define <4 x i32> @test_dup_zextload_i8_v4i32(ptr %p) { |
| ; CHECK-LABEL: test_dup_zextload_i8_v4i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr b0, [x0] |
| ; CHECK-NEXT: dup v0.4s, v0.s[0] |
| ; CHECK-NEXT: ret |
| %load = load i8, ptr %p, align 1 |
| %ext = zext i8 %load to i32 |
| %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 |
| %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer |
| ret <4 x i32> %dup |
| } |
| |
| define <4 x i32> @test_dup_zextload_i8_v4i32_offset(ptr %p) { |
| ; CHECK-LABEL: test_dup_zextload_i8_v4i32_offset: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr b0, [x0, #4] |
| ; CHECK-NEXT: dup v0.4s, v0.s[0] |
| ; CHECK-NEXT: ret |
| %addr = getelementptr inbounds i8, ptr %p, i64 4 |
| %load = load i8, ptr %addr, align 1 |
| %ext = zext i8 %load to i32 |
| %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 |
| %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer |
| ret <4 x i32> %dup |
| } |
| |
| define <4 x i32> @test_dup_zextload_i8_v4i32_reg_offset(ptr %p, i64 %offset) { |
| ; CHECK-LABEL: test_dup_zextload_i8_v4i32_reg_offset: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr b0, [x0, x1] |
| ; CHECK-NEXT: dup v0.4s, v0.s[0] |
| ; CHECK-NEXT: ret |
| %addr = getelementptr inbounds i8, ptr %p, i64 %offset |
| %load = load i8, ptr %addr, align 1 |
| %ext = zext i8 %load to i32 |
| %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 |
| %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer |
| ret <4 x i32> %dup |
| } |
| |
| define <2 x i64> @test_dup_zextload_i8_v2i64(ptr %p) { |
| ; CHECK-LABEL: test_dup_zextload_i8_v2i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr b0, [x0] |
| ; CHECK-NEXT: dup v0.2d, v0.d[0] |
| ; CHECK-NEXT: ret |
| %load = load i8, ptr %p, align 1 |
| %ext = zext i8 %load to i64 |
| %vec = insertelement <2 x i64> poison, i64 %ext, i32 0 |
| %dup = shufflevector <2 x i64> %vec, <2 x i64> poison, <2 x i32> zeroinitializer |
| ret <2 x i64> %dup |
| } |
| |
| define <2 x i32> @test_dup_zextload_i16_v2i32(ptr %p) { |
| ; CHECK-LABEL: test_dup_zextload_i16_v2i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr h0, [x0] |
| ; CHECK-NEXT: dup v0.2s, v0.s[0] |
| ; CHECK-NEXT: ret |
| %load = load i16, ptr %p, align 1 |
| %ext = zext i16 %load to i32 |
| %vec = insertelement <2 x i32> poison, i32 %ext, i32 0 |
| %dup = shufflevector <2 x i32> %vec, <2 x i32> poison, <2 x i32> zeroinitializer |
| ret <2 x i32> %dup |
| } |
| |
| define <4 x i32> @test_dup_zextload_i16_v4i32(ptr %p) { |
| ; CHECK-LABEL: test_dup_zextload_i16_v4i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr h0, [x0] |
| ; CHECK-NEXT: dup v0.4s, v0.s[0] |
| ; CHECK-NEXT: ret |
| %load = load i16, ptr %p, align 1 |
| %ext = zext i16 %load to i32 |
| %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 |
| %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer |
| ret <4 x i32> %dup |
| } |
| |
| define <4 x i32> @test_dup_zextload_i16_v4i32_offset(ptr %p) { |
| ; CHECK-LABEL: test_dup_zextload_i16_v4i32_offset: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr h0, [x0, #8] |
| ; CHECK-NEXT: dup v0.4s, v0.s[0] |
| ; CHECK-NEXT: ret |
| %addr = getelementptr inbounds i16, ptr %p, i64 4 |
| %load = load i16, ptr %addr, align 1 |
| %ext = zext i16 %load to i32 |
| %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 |
| %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer |
| ret <4 x i32> %dup |
| } |
| |
| define <4 x i32> @test_dup_zextload_i16_v4i32_reg_offset(ptr %p, i64 %offset) { |
| ; CHECK-LABEL: test_dup_zextload_i16_v4i32_reg_offset: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr h0, [x0, x1, lsl #1] |
| ; CHECK-NEXT: dup v0.4s, v0.s[0] |
| ; CHECK-NEXT: ret |
| %addr = getelementptr inbounds i16, ptr %p, i64 %offset |
| %load = load i16, ptr %addr, align 1 |
| %ext = zext i16 %load to i32 |
| %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 |
| %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer |
| ret <4 x i32> %dup |
| } |
| |
| define <2 x i64> @test_dup_zextload_i16_v2i64(ptr %p) { |
| ; CHECK-LABEL: test_dup_zextload_i16_v2i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr h0, [x0] |
| ; CHECK-NEXT: dup v0.2d, v0.d[0] |
| ; CHECK-NEXT: ret |
| %load = load i16, ptr %p, align 1 |
| %ext = zext i16 %load to i64 |
| %vec = insertelement <2 x i64> poison, i64 %ext, i32 0 |
| %dup = shufflevector <2 x i64> %vec, <2 x i64> poison, <2 x i32> zeroinitializer |
| ret <2 x i64> %dup |
| } |
| |
| define <2 x i64> @test_dup_zextload_i32_v2i64(ptr %p) { |
| ; CHECK-LABEL: test_dup_zextload_i32_v2i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr s0, [x0] |
| ; CHECK-NEXT: dup v0.2d, v0.d[0] |
| ; CHECK-NEXT: ret |
| %load = load i32, ptr %p, align 1 |
| %ext = zext i32 %load to i64 |
| %vec = insertelement <2 x i64> poison, i64 %ext, i32 0 |
| %dup = shufflevector <2 x i64> %vec, <2 x i64> poison, <2 x i32> zeroinitializer |
| ret <2 x i64> %dup |
| } |