blob: 6df8d2be6deaba0b836187ad7ad5858139afe1b4 [file]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=arm64-apple-ios -mattr=+sve -o - %s | FileCheck %s --check-prefix=CHECK-LE-SD
; RUN: llc -mtriple=aarch64_be-unknown-linux -mattr=+sve -o - %s | FileCheck %s --check-prefix=CHECK-BE
; RUN: llc -mtriple=arm64-apple-ios -global-isel -mattr=+sve -o - %s | FileCheck %s --check-prefix=CHECK-LE-GI
define void @zext_of_concat(ptr %a, ptr %b, ptr %c, ptr %d) nounwind {
; CHECK-LE-SD-LABEL: zext_of_concat:
; CHECK-LE-SD: ; %bb.0:
; CHECK-LE-SD-NEXT: ldr d0, [x0]
; CHECK-LE-SD-NEXT: ldr d1, [x1]
; CHECK-LE-SD-NEXT: add.2s v0, v0, v1
; CHECK-LE-SD-NEXT: ldr q1, [x2]
; CHECK-LE-SD-NEXT: ushll.2d v0, v0, #0
; CHECK-LE-SD-NEXT: add.4s v0, v0, v1
; CHECK-LE-SD-NEXT: str q0, [x2]
; CHECK-LE-SD-NEXT: ret
;
; CHECK-BE-LABEL: zext_of_concat:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: ld1 { v0.2s }, [x0]
; CHECK-BE-NEXT: ld1 { v1.2s }, [x1]
; CHECK-BE-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-BE-NEXT: movi v1.2d, #0000000000000000
; CHECK-BE-NEXT: zip1 v0.4s, v0.4s, v0.4s
; CHECK-BE-NEXT: trn2 v0.4s, v0.4s, v1.4s
; CHECK-BE-NEXT: ld1 { v1.4s }, [x2]
; CHECK-BE-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-BE-NEXT: st1 { v0.4s }, [x2]
; CHECK-BE-NEXT: ret
;
; CHECK-LE-GI-LABEL: zext_of_concat:
; CHECK-LE-GI: ; %bb.0:
; CHECK-LE-GI-NEXT: ldr d0, [x0]
; CHECK-LE-GI-NEXT: ldr d1, [x1]
; CHECK-LE-GI-NEXT: movi.2d v3, #0000000000000000
; CHECK-LE-GI-NEXT: Lloh0:
; CHECK-LE-GI-NEXT: adrp x8, lCPI0_0@PAGE
; CHECK-LE-GI-NEXT: add.2s v2, v0, v1
; CHECK-LE-GI-NEXT: Lloh1:
; CHECK-LE-GI-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF]
; CHECK-LE-GI-NEXT: ldr q1, [x2]
; CHECK-LE-GI-NEXT: tbl.16b v0, { v2, v3 }, v0
; CHECK-LE-GI-NEXT: add.4s v0, v0, v1
; CHECK-LE-GI-NEXT: str q0, [x2]
; CHECK-LE-GI-NEXT: ret
; CHECK-LE-GI-NEXT: .loh AdrpLdr Lloh0, Lloh1
%i0.a = load <2 x i32>, ptr %a
%i0.b = load <2 x i32>, ptr %b
%i0 = add <2 x i32> %i0.a, %i0.b
%i1 = shufflevector <2 x i32> %i0, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%i2 = shufflevector <4 x i32> %i1, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
%i3 = load <4 x i32>, ptr %c
%i4 = add <4 x i32> %i2, %i3
store <4 x i32> %i4, ptr %c
ret void
}
define void @zext_of_concat_extrause(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) nounwind {
; CHECK-LE-SD-LABEL: zext_of_concat_extrause:
; CHECK-LE-SD: ; %bb.0:
; CHECK-LE-SD-NEXT: ldr d0, [x1]
; CHECK-LE-SD-NEXT: ldr d1, [x0]
; CHECK-LE-SD-NEXT: add.2s v0, v1, v0
; CHECK-LE-SD-NEXT: movi.2d v1, #0000000000000000
; CHECK-LE-SD-NEXT: mov.d v0[1], v0[0]
; CHECK-LE-SD-NEXT: zip1.4s v1, v0, v1
; CHECK-LE-SD-NEXT: str q0, [x4]
; CHECK-LE-SD-NEXT: ldr q0, [x2]
; CHECK-LE-SD-NEXT: add.4s v0, v1, v0
; CHECK-LE-SD-NEXT: str q0, [x2]
; CHECK-LE-SD-NEXT: ret
;
; CHECK-BE-LABEL: zext_of_concat_extrause:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: ld1 { v0.2s }, [x1]
; CHECK-BE-NEXT: ld1 { v1.2s }, [x0]
; CHECK-BE-NEXT: movi v2.2d, #0000000000000000
; CHECK-BE-NEXT: add v0.2s, v1.2s, v0.2s
; CHECK-BE-NEXT: mov v0.d[1], v0.d[0]
; CHECK-BE-NEXT: zip1 v1.4s, v0.4s, v0.4s
; CHECK-BE-NEXT: st1 { v0.4s }, [x4]
; CHECK-BE-NEXT: trn2 v0.4s, v1.4s, v2.4s
; CHECK-BE-NEXT: ld1 { v1.4s }, [x2]
; CHECK-BE-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-BE-NEXT: st1 { v0.4s }, [x2]
; CHECK-BE-NEXT: ret
;
; CHECK-LE-GI-LABEL: zext_of_concat_extrause:
; CHECK-LE-GI: ; %bb.0:
; CHECK-LE-GI-NEXT: ldr d0, [x0]
; CHECK-LE-GI-NEXT: ldr d1, [x1]
; CHECK-LE-GI-NEXT: movi.2d v3, #0000000000000000
; CHECK-LE-GI-NEXT: Lloh2:
; CHECK-LE-GI-NEXT: adrp x8, lCPI1_0@PAGE
; CHECK-LE-GI-NEXT: add.2s v2, v0, v1
; CHECK-LE-GI-NEXT: Lloh3:
; CHECK-LE-GI-NEXT: ldr q0, [x8, lCPI1_0@PAGEOFF]
; CHECK-LE-GI-NEXT: mov.d v2[1], v2[0]
; CHECK-LE-GI-NEXT: tbl.16b v0, { v2, v3 }, v0
; CHECK-LE-GI-NEXT: str q2, [x4]
; CHECK-LE-GI-NEXT: ldr q1, [x2]
; CHECK-LE-GI-NEXT: add.4s v0, v0, v1
; CHECK-LE-GI-NEXT: str q0, [x2]
; CHECK-LE-GI-NEXT: ret
; CHECK-LE-GI-NEXT: .loh AdrpLdr Lloh2, Lloh3
%i0.a = load <2 x i32>, ptr %a
%i0.b = load <2 x i32>, ptr %b
%i0 = add <2 x i32> %i0.a, %i0.b
%i1 = shufflevector <2 x i32> %i0, <2 x i32> %i0, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i32> %i1, ptr %e
%i2 = shufflevector <4 x i32> %i1, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
%i3 = load <4 x i32>, ptr %c
%i4 = add <4 x i32> %i2, %i3
store <4 x i32> %i4, ptr %c
ret void
}
define void @aext_of_concat(ptr %a, ptr %b, ptr %c, ptr %d) nounwind {
; CHECK-LE-SD-LABEL: aext_of_concat:
; CHECK-LE-SD: ; %bb.0:
; CHECK-LE-SD-NEXT: ldr d0, [x0]
; CHECK-LE-SD-NEXT: ldr d1, [x1]
; CHECK-LE-SD-NEXT: add.2s v0, v0, v1
; CHECK-LE-SD-NEXT: ldr q1, [x2]
; CHECK-LE-SD-NEXT: ushll.2d v0, v0, #0
; CHECK-LE-SD-NEXT: add.4s v0, v0, v1
; CHECK-LE-SD-NEXT: str q0, [x2]
; CHECK-LE-SD-NEXT: ret
;
; CHECK-BE-LABEL: aext_of_concat:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: ld1 { v0.2s }, [x0]
; CHECK-BE-NEXT: ld1 { v1.2s }, [x1]
; CHECK-BE-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-BE-NEXT: ld1 { v1.4s }, [x2]
; CHECK-BE-NEXT: zip1 v0.4s, v0.4s, v0.4s
; CHECK-BE-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-BE-NEXT: st1 { v0.4s }, [x2]
; CHECK-BE-NEXT: ret
;
; CHECK-LE-GI-LABEL: aext_of_concat:
; CHECK-LE-GI: ; %bb.0:
; CHECK-LE-GI-NEXT: ldr d0, [x0]
; CHECK-LE-GI-NEXT: ldr d1, [x1]
; CHECK-LE-GI-NEXT: add.2s v0, v0, v1
; CHECK-LE-GI-NEXT: ldr q1, [x2]
; CHECK-LE-GI-NEXT: zip1.4s v0, v0, v0
; CHECK-LE-GI-NEXT: add.4s v0, v0, v1
; CHECK-LE-GI-NEXT: str q0, [x2]
; CHECK-LE-GI-NEXT: ret
%i0.a = load <2 x i32>, ptr %a
%i0.b = load <2 x i32>, ptr %b
%i0 = add <2 x i32> %i0.a, %i0.b
%i1 = shufflevector <2 x i32> %i0, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%i2 = shufflevector <4 x i32> %i1, <4 x i32> poison, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
%i3 = load <4 x i32>, ptr %c
%i4 = add <4 x i32> %i2, %i3
store <4 x i32> %i4, ptr %c
ret void
}
define void @aext_of_concat_extrause(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) nounwind {
; CHECK-LE-SD-LABEL: aext_of_concat_extrause:
; CHECK-LE-SD: ; %bb.0:
; CHECK-LE-SD-NEXT: ldr d0, [x1]
; CHECK-LE-SD-NEXT: ldr d1, [x0]
; CHECK-LE-SD-NEXT: add.2s v0, v1, v0
; CHECK-LE-SD-NEXT: mov.16b v1, v0
; CHECK-LE-SD-NEXT: mov.d v1[1], v0[0]
; CHECK-LE-SD-NEXT: zip1.4s v0, v0, v0
; CHECK-LE-SD-NEXT: str q1, [x4]
; CHECK-LE-SD-NEXT: ldr q1, [x2]
; CHECK-LE-SD-NEXT: add.4s v0, v0, v1
; CHECK-LE-SD-NEXT: str q0, [x2]
; CHECK-LE-SD-NEXT: ret
;
; CHECK-BE-LABEL: aext_of_concat_extrause:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: ld1 { v0.2s }, [x1]
; CHECK-BE-NEXT: ld1 { v1.2s }, [x0]
; CHECK-BE-NEXT: add v0.2s, v1.2s, v0.2s
; CHECK-BE-NEXT: mov v1.16b, v0.16b
; CHECK-BE-NEXT: mov v1.d[1], v0.d[0]
; CHECK-BE-NEXT: zip1 v0.4s, v0.4s, v0.4s
; CHECK-BE-NEXT: st1 { v1.4s }, [x4]
; CHECK-BE-NEXT: ld1 { v1.4s }, [x2]
; CHECK-BE-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-BE-NEXT: st1 { v0.4s }, [x2]
; CHECK-BE-NEXT: ret
;
; CHECK-LE-GI-LABEL: aext_of_concat_extrause:
; CHECK-LE-GI: ; %bb.0:
; CHECK-LE-GI-NEXT: ldr d0, [x0]
; CHECK-LE-GI-NEXT: ldr d1, [x1]
; CHECK-LE-GI-NEXT: add.2s v0, v0, v1
; CHECK-LE-GI-NEXT: mov.d v0[1], v0[0]
; CHECK-LE-GI-NEXT: zip1.4s v1, v0, v0
; CHECK-LE-GI-NEXT: str q0, [x4]
; CHECK-LE-GI-NEXT: ldr q0, [x2]
; CHECK-LE-GI-NEXT: add.4s v0, v1, v0
; CHECK-LE-GI-NEXT: str q0, [x2]
; CHECK-LE-GI-NEXT: ret
%i0.a = load <2 x i32>, ptr %a
%i0.b = load <2 x i32>, ptr %b
%i0 = add <2 x i32> %i0.a, %i0.b
%i1 = shufflevector <2 x i32> %i0, <2 x i32> %i0, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i32> %i1, ptr %e
%i2 = shufflevector <4 x i32> %i1, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
%i3 = load <4 x i32>, ptr %c
%i4 = add <4 x i32> %i2, %i3
store <4 x i32> %i4, ptr %c
ret void
}