| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc --mattr=+sve -aarch64-enable-sink-fold=true < %s -mtriple aarch64-apple-darwin | FileCheck %s |
| ; RUN: llc --mattr=+sve -aarch64-enable-sink-fold=true < %s -mtriple aarch64_be-unknown-unknown | FileCheck --check-prefix CHECK-BE %s |
| |
| define <4 x double> @test_ldnp_v4f64(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v4f64: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v4f64: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldp q0, q1, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <4 x double>, ptr %A, align 8, !nontemporal !0 |
| ret <4 x double> %lv |
| } |
| |
| define <4 x i64> @test_ldnp_v4i64(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v4i64: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v4i64: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldp q0, q1, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <4 x i64>, ptr %A, align 8, !nontemporal !0 |
| ret <4 x i64> %lv |
| } |
| |
| define <8 x i32> @test_ldnp_v8i32(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v8i32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v8i32: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldp q0, q1, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <8 x i32>, ptr %A, align 8, !nontemporal !0 |
| ret <8 x i32> %lv |
| } |
| |
| define <8 x float> @test_ldnp_v8f32(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v8f32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v8f32: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldp q0, q1, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <8 x float>, ptr %A, align 8, !nontemporal !0 |
| ret <8 x float> %lv |
| } |
| |
| define <16 x i16> @test_ldnp_v16i16(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v16i16: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v16i16: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldp q0, q1, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <16 x i16>, ptr %A, align 8, !nontemporal !0 |
| ret <16 x i16> %lv |
| } |
| |
| define <16 x half> @test_ldnp_v16f16(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v16f16: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v16f16: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldp q0, q1, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <16 x half>, ptr %A, align 8, !nontemporal !0 |
| ret <16 x half> %lv |
| } |
| |
| define <32 x i8> @test_ldnp_v32i8(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v32i8: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v32i8: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldp q0, q1, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <32 x i8>, ptr %A, align 8, !nontemporal !0 |
| ret <32 x i8> %lv |
| } |
| |
| define <4 x i32> @test_ldnp_v4i32(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v4i32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldr q0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v4i32: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldr q0, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load<4 x i32>, ptr %A, align 8, !nontemporal !0 |
| ret <4 x i32> %lv |
| } |
| |
| define <4 x float> @test_ldnp_v4f32(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v4f32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldr q0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v4f32: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldr q0, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load<4 x float>, ptr %A, align 8, !nontemporal !0 |
| ret <4 x float> %lv |
| } |
| |
| define <8 x i16> @test_ldnp_v8i16(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v8i16: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldr q0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v8i16: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldr q0, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <8 x i16>, ptr %A, align 8, !nontemporal !0 |
| ret <8 x i16> %lv |
| } |
| |
| define <16 x i8> @test_ldnp_v16i8(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v16i8: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldr q0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v16i8: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldr q0, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <16 x i8>, ptr %A, align 8, !nontemporal !0 |
| ret <16 x i8> %lv |
| } |
| define <2 x double> @test_ldnp_v2f64(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v2f64: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldr q0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v2f64: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldr q0, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <2 x double>, ptr %A, align 8, !nontemporal !0 |
| ret <2 x double> %lv |
| } |
| |
| define <2 x i32> @test_ldnp_v2i32(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v2i32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldr d0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v2i32: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldr d0, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <2 x i32>, ptr %A, align 8, !nontemporal !0 |
| ret <2 x i32> %lv |
| } |
| |
| define <2 x float> @test_ldnp_v2f32(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v2f32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldr d0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v2f32: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldr d0, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <2 x float>, ptr %A, align 8, !nontemporal !0 |
| ret <2 x float> %lv |
| } |
| |
| define <4 x i16> @test_ldnp_v4i16(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v4i16: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldr d0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v4i16: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldr d0, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <4 x i16>, ptr %A, align 8, !nontemporal !0 |
| ret <4 x i16> %lv |
| } |
| |
| define <8 x i8> @test_ldnp_v8i8(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v8i8: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldr d0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v8i8: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldr d0, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <8 x i8>, ptr %A, align 8, !nontemporal !0 |
| ret <8 x i8> %lv |
| } |
| |
| define <1 x double> @test_ldnp_v1f64(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v1f64: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldr d0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v1f64: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldr d0, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <1 x double>, ptr %A, align 8, !nontemporal !0 |
| ret <1 x double> %lv |
| } |
| |
| define <1 x i64> @test_ldnp_v1i64(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v1i64: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldr d0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v1i64: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldr d0, [x0] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <1 x i64>, ptr %A, align 8, !nontemporal !0 |
| ret <1 x i64> %lv |
| } |
| |
| define <32 x i16> @test_ldnp_v32i16(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v32i16: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0] |
| ; CHECK-NEXT: ldnp q2, q3, [x0, #32] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v32i16: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldp q0, q1, [x0] |
| ; CHECK-BE-NEXT: ldp q2, q3, [x0, #32] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <32 x i16>, ptr %A, align 8, !nontemporal !0 |
| ret <32 x i16> %lv |
| } |
| |
| define <32 x half> @test_ldnp_v32f16(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v32f16: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0] |
| ; CHECK-NEXT: ldnp q2, q3, [x0, #32] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v32f16: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldp q0, q1, [x0] |
| ; CHECK-BE-NEXT: ldp q2, q3, [x0, #32] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <32 x half>, ptr %A, align 8, !nontemporal !0 |
| ret <32 x half> %lv |
| } |
| |
| define <16 x i32> @test_ldnp_v16i32(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v16i32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0] |
| ; CHECK-NEXT: ldnp q2, q3, [x0, #32] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v16i32: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldp q0, q1, [x0] |
| ; CHECK-BE-NEXT: ldp q2, q3, [x0, #32] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <16 x i32>, ptr %A, align 8, !nontemporal !0 |
| ret <16 x i32> %lv |
| } |
| |
| define <16 x float> @test_ldnp_v16f32(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v16f32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0] |
| ; CHECK-NEXT: ldnp q2, q3, [x0, #32] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v16f32: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldp q0, q1, [x0] |
| ; CHECK-BE-NEXT: ldp q2, q3, [x0, #32] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <16 x float>, ptr %A, align 8, !nontemporal !0 |
| ret <16 x float> %lv |
| } |
| |
| define <17 x float> @test_ldnp_v17f32(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v17f32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0, #32] |
| ; CHECK-NEXT: ldr s2, [x0, #64] |
| ; CHECK-NEXT: ldnp q3, q4, [x0] |
| ; CHECK-NEXT: stp q0, q1, [x8, #32] |
| ; CHECK-NEXT: stp q3, q4, [x8] |
| ; CHECK-NEXT: str s2, [x8, #64] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v17f32: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: add x9, x0, #48 |
| ; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] |
| ; CHECK-BE-NEXT: add x10, x0, #32 |
| ; CHECK-BE-NEXT: ld1 { v2.4s }, [x9] |
| ; CHECK-BE-NEXT: add x9, x0, #16 |
| ; CHECK-BE-NEXT: ldr s1, [x0, #64] |
| ; CHECK-BE-NEXT: ld1 { v3.4s }, [x9] |
| ; CHECK-BE-NEXT: ld1 { v4.4s }, [x10] |
| ; CHECK-BE-NEXT: add x9, x8, #48 |
| ; CHECK-BE-NEXT: str s1, [x8, #64] |
| ; CHECK-BE-NEXT: add x10, x8, #32 |
| ; CHECK-BE-NEXT: st1 { v0.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: st1 { v2.4s }, [x9] |
| ; CHECK-BE-NEXT: st1 { v4.4s }, [x10] |
| ; CHECK-BE-NEXT: st1 { v3.4s }, [x8] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <17 x float>, ptr %A, align 8, !nontemporal !0 |
| ret <17 x float> %lv |
| } |
| |
| define <33 x double> @test_ldnp_v33f64(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v33f64: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0] |
| ; CHECK-NEXT: ldr d20, [x0, #256] |
| ; CHECK-NEXT: ldnp q2, q3, [x0, #32] |
| ; CHECK-NEXT: ldnp q4, q5, [x0, #64] |
| ; CHECK-NEXT: ldnp q6, q7, [x0, #96] |
| ; CHECK-NEXT: ldnp q16, q17, [x0, #128] |
| ; CHECK-NEXT: ldnp q18, q19, [x0, #224] |
| ; CHECK-NEXT: ldnp q21, q22, [x0, #160] |
| ; CHECK-NEXT: ldnp q23, q24, [x0, #192] |
| ; CHECK-NEXT: stp q0, q1, [x8] |
| ; CHECK-NEXT: stp q2, q3, [x8, #32] |
| ; CHECK-NEXT: stp q4, q5, [x8, #64] |
| ; CHECK-NEXT: stp q6, q7, [x8, #96] |
| ; CHECK-NEXT: stp q16, q17, [x8, #128] |
| ; CHECK-NEXT: stp q21, q22, [x8, #160] |
| ; CHECK-NEXT: stp q23, q24, [x8, #192] |
| ; CHECK-NEXT: stp q18, q19, [x8, #224] |
| ; CHECK-NEXT: str d20, [x8, #256] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v33f64: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: add x9, x0, #16 |
| ; CHECK-BE-NEXT: add x10, x0, #32 |
| ; CHECK-BE-NEXT: add x11, x0, #48 |
| ; CHECK-BE-NEXT: ld1 { v0.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x0, #64 |
| ; CHECK-BE-NEXT: ld1 { v1.2d }, [x10] |
| ; CHECK-BE-NEXT: add x10, x0, #80 |
| ; CHECK-BE-NEXT: ld1 { v3.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x0, #112 |
| ; CHECK-BE-NEXT: ld1 { v4.2d }, [x10] |
| ; CHECK-BE-NEXT: add x10, x0, #128 |
| ; CHECK-BE-NEXT: ld1 { v6.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x0, #160 |
| ; CHECK-BE-NEXT: ld1 { v7.2d }, [x10] |
| ; CHECK-BE-NEXT: add x10, x0, #176 |
| ; CHECK-BE-NEXT: ld1 { v17.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x0, #240 |
| ; CHECK-BE-NEXT: ld1 { v2.2d }, [x11] |
| ; CHECK-BE-NEXT: add x11, x0, #96 |
| ; CHECK-BE-NEXT: ld1 { v18.2d }, [x10] |
| ; CHECK-BE-NEXT: ld1 { v20.2d }, [x0] |
| ; CHECK-BE-NEXT: ld1 { v22.2d }, [x9] |
| ; CHECK-BE-NEXT: add x10, x0, #224 |
| ; CHECK-BE-NEXT: ld1 { v5.2d }, [x11] |
| ; CHECK-BE-NEXT: add x11, x0, #144 |
| ; CHECK-BE-NEXT: ldr d21, [x0, #256] |
| ; CHECK-BE-NEXT: add x9, x0, #208 |
| ; CHECK-BE-NEXT: ld1 { v24.2d }, [x10] |
| ; CHECK-BE-NEXT: ld1 { v16.2d }, [x11] |
| ; CHECK-BE-NEXT: add x11, x0, #192 |
| ; CHECK-BE-NEXT: ld1 { v23.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x8, #240 |
| ; CHECK-BE-NEXT: ld1 { v19.2d }, [x11] |
| ; CHECK-BE-NEXT: str d21, [x8, #256] |
| ; CHECK-BE-NEXT: st1 { v20.2d }, [x8] |
| ; CHECK-BE-NEXT: st1 { v22.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x8, #224 |
| ; CHECK-BE-NEXT: st1 { v24.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x8, #208 |
| ; CHECK-BE-NEXT: st1 { v23.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x8, #192 |
| ; CHECK-BE-NEXT: st1 { v19.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x8, #176 |
| ; CHECK-BE-NEXT: st1 { v18.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x8, #160 |
| ; CHECK-BE-NEXT: st1 { v17.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x8, #144 |
| ; CHECK-BE-NEXT: st1 { v16.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x8, #128 |
| ; CHECK-BE-NEXT: st1 { v7.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x8, #112 |
| ; CHECK-BE-NEXT: st1 { v6.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x8, #96 |
| ; CHECK-BE-NEXT: st1 { v5.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x8, #80 |
| ; CHECK-BE-NEXT: st1 { v4.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x8, #64 |
| ; CHECK-BE-NEXT: st1 { v3.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x8, #48 |
| ; CHECK-BE-NEXT: st1 { v2.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x8, #32 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: st1 { v1.2d }, [x9] |
| ; CHECK-BE-NEXT: st1 { v0.2d }, [x8] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <33 x double>, ptr %A, align 8, !nontemporal !0 |
| ret <33 x double> %lv |
| } |
| |
| define <33 x i8> @test_ldnp_v33i8(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v33i8: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0] |
| ; CHECK-NEXT: add x9, x8, #32 |
| ; CHECK-NEXT: ldr b2, [x0, #32] |
| ; CHECK-NEXT: stp q0, q1, [x8] |
| ; CHECK-NEXT: st1.b { v2 }[0], [x9] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v33i8: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: add x9, x0, #16 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] |
| ; CHECK-BE-NEXT: ldrb w10, [x0, #32] |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] |
| ; CHECK-BE-NEXT: strb w10, [x8, #32] |
| ; CHECK-BE-NEXT: st1 { v0.16b }, [x8] |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: st1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: ret |
| %lv = load<33 x i8>, ptr %A, align 8, !nontemporal !0 |
| ret <33 x i8> %lv |
| } |
| |
| define <4 x i65> @test_ldnp_v4i65(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v4i65: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldp x8, x9, [x0, #8] |
| ; CHECK-NEXT: ldr d0, [x0] |
| ; CHECK-NEXT: ldr x10, [x0, #24] |
| ; CHECK-NEXT: ldrb w11, [x0, #32] |
| ; CHECK-NEXT: and x1, x8, #0x1 |
| ; CHECK-NEXT: extr x2, x9, x8, #1 |
| ; CHECK-NEXT: extr x4, x10, x9, #2 |
| ; CHECK-NEXT: mov.d v0[1], x1 |
| ; CHECK-NEXT: extr x6, x11, x10, #3 |
| ; CHECK-NEXT: ubfx x3, x9, #1, #1 |
| ; CHECK-NEXT: ubfx x5, x10, #2, #1 |
| ; CHECK-NEXT: ubfx x7, x11, #3, #1 |
| ; CHECK-NEXT: fmov x0, d0 |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v4i65: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldp x10, x9, [x0] |
| ; CHECK-BE-NEXT: ldrb w8, [x0, #32] |
| ; CHECK-BE-NEXT: ldp x12, x11, [x0, #16] |
| ; CHECK-BE-NEXT: lsr x13, x10, #56 |
| ; CHECK-BE-NEXT: orr x7, x8, x11, lsl #8 |
| ; CHECK-BE-NEXT: extr x8, x10, x9, #56 |
| ; CHECK-BE-NEXT: extr x11, x12, x11, #56 |
| ; CHECK-BE-NEXT: lsr x14, x12, #56 |
| ; CHECK-BE-NEXT: extr x15, x9, x12, #56 |
| ; CHECK-BE-NEXT: lsr x10, x10, #59 |
| ; CHECK-BE-NEXT: extr x1, x13, x8, #3 |
| ; CHECK-BE-NEXT: lsr x8, x9, #56 |
| ; CHECK-BE-NEXT: ubfx x12, x12, #57, #1 |
| ; CHECK-BE-NEXT: ubfx x9, x9, #58, #1 |
| ; CHECK-BE-NEXT: extr x5, x14, x11, #1 |
| ; CHECK-BE-NEXT: and x11, x11, #0x1 |
| ; CHECK-BE-NEXT: fmov d0, x10 |
| ; CHECK-BE-NEXT: fmov d2, x12 |
| ; CHECK-BE-NEXT: fmov d3, x11 |
| ; CHECK-BE-NEXT: fmov d1, x9 |
| ; CHECK-BE-NEXT: extr x3, x8, x15, #2 |
| ; CHECK-BE-NEXT: mov v0.d[1], x1 |
| ; CHECK-BE-NEXT: mov v2.d[1], x5 |
| ; CHECK-BE-NEXT: mov v3.d[1], x7 |
| ; CHECK-BE-NEXT: mov v1.d[1], x3 |
| ; CHECK-BE-NEXT: fmov x0, d0 |
| ; CHECK-BE-NEXT: fmov x4, d2 |
| ; CHECK-BE-NEXT: fmov x6, d3 |
| ; CHECK-BE-NEXT: fmov x2, d1 |
| ; CHECK-BE-NEXT: ret |
| %lv = load <4 x i65>, ptr %A, align 8, !nontemporal !0 |
| ret <4 x i65> %lv |
| } |
| |
| define <4 x i63> @test_ldnp_v4i63(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v4i63: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldp x8, x9, [x0, #16] |
| ; CHECK-NEXT: ldp x10, x11, [x0] |
| ; CHECK-NEXT: extr x3, x9, x8, #61 |
| ; CHECK-NEXT: extr x9, x11, x10, #63 |
| ; CHECK-NEXT: extr x8, x8, x11, #62 |
| ; CHECK-NEXT: and x0, x10, #0x7fffffffffffffff |
| ; CHECK-NEXT: and x1, x9, #0x7fffffffffffffff |
| ; CHECK-NEXT: and x2, x8, #0x7fffffffffffffff |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v4i63: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldp x9, x8, [x0, #8] |
| ; CHECK-BE-NEXT: ldr x11, [x0, #24] |
| ; CHECK-BE-NEXT: ldr x10, [x0] |
| ; CHECK-BE-NEXT: and x3, x11, #0x7fffffffffffffff |
| ; CHECK-BE-NEXT: extr x12, x9, x8, #62 |
| ; CHECK-BE-NEXT: extr x8, x8, x11, #63 |
| ; CHECK-BE-NEXT: extr x0, x10, x9, #61 |
| ; CHECK-BE-NEXT: and x1, x12, #0x7fffffffffffffff |
| ; CHECK-BE-NEXT: and x2, x8, #0x7fffffffffffffff |
| ; CHECK-BE-NEXT: ret |
| %lv = load <4 x i63>, ptr %A, align 8, !nontemporal !0 |
| ret <4 x i63> %lv |
| } |
| |
| define <5 x double> @test_ldnp_v5f64(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v5f64: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q2, [x0] |
| ; CHECK-NEXT: ldr d4, [x0, #32] |
| ; CHECK-NEXT: ext.16b v1, v0, v0, #8 |
| ; CHECK-NEXT: ext.16b v3, v2, v2, #8 |
| ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q2 |
| ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q1 |
| ; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q3 |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v5f64: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: add x8, x0, #16 |
| ; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] |
| ; CHECK-BE-NEXT: ldr d4, [x0, #32] |
| ; CHECK-BE-NEXT: ld1 { v2.2d }, [x8] |
| ; CHECK-BE-NEXT: // kill: def $d4 killed $d4 killed $q4 |
| ; CHECK-BE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-BE-NEXT: ext v3.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: // kill: def $d1 killed $d1 killed $q1 |
| ; CHECK-BE-NEXT: // kill: def $d2 killed $d2 killed $q2 |
| ; CHECK-BE-NEXT: // kill: def $d3 killed $d3 killed $q3 |
| ; CHECK-BE-NEXT: ret |
| %lv = load<5 x double>, ptr %A, align 8, !nontemporal !0 |
| ret <5 x double> %lv |
| } |
| |
| define <16 x i64> @test_ldnp_v16i64(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v16i64: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0] |
| ; CHECK-NEXT: ldnp q2, q3, [x0, #32] |
| ; CHECK-NEXT: ldnp q4, q5, [x0, #64] |
| ; CHECK-NEXT: ldnp q6, q7, [x0, #96] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v16i64: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldp q0, q1, [x0] |
| ; CHECK-BE-NEXT: ldp q2, q3, [x0, #32] |
| ; CHECK-BE-NEXT: ldp q4, q5, [x0, #64] |
| ; CHECK-BE-NEXT: ldp q6, q7, [x0, #96] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <16 x i64>, ptr %A, align 8, !nontemporal !0 |
| ret <16 x i64> %lv |
| } |
| |
| define <16 x double> @test_ldnp_v16f64(ptr %A) { |
| ; CHECK-LABEL: test_ldnp_v16f64: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ldnp q0, q1, [x0] |
| ; CHECK-NEXT: ldnp q2, q3, [x0, #32] |
| ; CHECK-NEXT: ldnp q4, q5, [x0, #64] |
| ; CHECK-NEXT: ldnp q6, q7, [x0, #96] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v16f64: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ldp q0, q1, [x0] |
| ; CHECK-BE-NEXT: ldp q2, q3, [x0, #32] |
| ; CHECK-BE-NEXT: ldp q4, q5, [x0, #64] |
| ; CHECK-BE-NEXT: ldp q6, q7, [x0, #96] |
| ; CHECK-BE-NEXT: ret |
| %lv = load <16 x double>, ptr %A, align 8, !nontemporal !0 |
| ret <16 x double> %lv |
| } |
| |
| define <vscale x 20 x float> @test_ldnp_v20f32_vscale(<vscale x 20 x float>* %A) { |
| ; CHECK-LABEL: test_ldnp_v20f32_vscale: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, #1, mul vl] |
| ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, #2, mul vl] |
| ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x0, #3, mul vl] |
| ; CHECK-NEXT: ld1w { z4.s }, p0/z, [x0, #4, mul vl] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_ldnp_v20f32_vscale: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: ptrue p0.s |
| ; CHECK-BE-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; CHECK-BE-NEXT: ld1w { z1.s }, p0/z, [x0, #1, mul vl] |
| ; CHECK-BE-NEXT: ld1w { z2.s }, p0/z, [x0, #2, mul vl] |
| ; CHECK-BE-NEXT: ld1w { z3.s }, p0/z, [x0, #3, mul vl] |
| ; CHECK-BE-NEXT: ld1w { z4.s }, p0/z, [x0, #4, mul vl] |
| ; CHECK-BE-NEXT: ret |
| %lv = load<vscale x 20 x float>, <vscale x 20 x float>* %A, align 8, !nontemporal !0 |
| ret <vscale x 20 x float> %lv |
| } |
| |
| !0 = !{i32 1} |