blob: 730dfed5ff228e23cfd35d01781f8c1d7b085c89 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
define { <16 x i8>, <16 x i8> } @foo_ld2_v16i8(<16 x i1> %mask, ptr %p) {
; CHECK-LABEL: foo_ld2_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: zip2 v1.16b, v0.16b, v0.16b
; CHECK-NEXT: zip1 v0.16b, v0.16b, v0.16b
; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT: shl v1.16b, v1.16b, #7
; CHECK-NEXT: shl v0.16b, v0.16b, #7
; CHECK-NEXT: cmlt v1.16b, v1.16b, #0
; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8
; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: zip1 v1.16b, v1.16b, v2.16b
; CHECK-NEXT: zip1 v0.16b, v0.16b, v3.16b
; CHECK-NEXT: addv h1, v1.8h
; CHECK-NEXT: addv h0, v0.8h
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: bfi w8, w9, #16, #16
; CHECK-NEXT: tbz w8, #0, .LBB0_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ldr b1, [x0]
; CHECK-NEXT: tbnz w8, #1, .LBB0_3
; CHECK-NEXT: b .LBB0_4
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: // implicit-def: $q1
; CHECK-NEXT: tbz w8, #1, .LBB0_4
; CHECK-NEXT: .LBB0_3: // %cond.load1
; CHECK-NEXT: add x9, x0, #1
; CHECK-NEXT: ld1 { v1.b }[1], [x9]
; CHECK-NEXT: .LBB0_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB0_20
; CHECK-NEXT: // %bb.5: // %else5
; CHECK-NEXT: tbnz w8, #3, .LBB0_21
; CHECK-NEXT: .LBB0_6: // %else8
; CHECK-NEXT: tbnz w8, #4, .LBB0_22
; CHECK-NEXT: .LBB0_7: // %else11
; CHECK-NEXT: tbnz w8, #5, .LBB0_23
; CHECK-NEXT: .LBB0_8: // %else14
; CHECK-NEXT: tbnz w8, #6, .LBB0_24
; CHECK-NEXT: .LBB0_9: // %else17
; CHECK-NEXT: tbnz w8, #7, .LBB0_25
; CHECK-NEXT: .LBB0_10: // %else20
; CHECK-NEXT: tbnz w8, #8, .LBB0_26
; CHECK-NEXT: .LBB0_11: // %else23
; CHECK-NEXT: tbnz w8, #9, .LBB0_27
; CHECK-NEXT: .LBB0_12: // %else26
; CHECK-NEXT: tbnz w8, #10, .LBB0_28
; CHECK-NEXT: .LBB0_13: // %else29
; CHECK-NEXT: tbnz w8, #11, .LBB0_29
; CHECK-NEXT: .LBB0_14: // %else32
; CHECK-NEXT: tbnz w8, #12, .LBB0_30
; CHECK-NEXT: .LBB0_15: // %else35
; CHECK-NEXT: tbnz w8, #13, .LBB0_31
; CHECK-NEXT: .LBB0_16: // %else38
; CHECK-NEXT: tbnz w8, #14, .LBB0_32
; CHECK-NEXT: .LBB0_17: // %else41
; CHECK-NEXT: tbnz w8, #15, .LBB0_33
; CHECK-NEXT: .LBB0_18: // %else44
; CHECK-NEXT: tbz w8, #16, .LBB0_34
; CHECK-NEXT: .LBB0_19: // %cond.load46
; CHECK-NEXT: add x9, x0, #16
; CHECK-NEXT: ld1 { v2.b }[0], [x9]
; CHECK-NEXT: tbnz w8, #17, .LBB0_35
; CHECK-NEXT: b .LBB0_36
; CHECK-NEXT: .LBB0_20: // %cond.load4
; CHECK-NEXT: add x9, x0, #2
; CHECK-NEXT: ld1 { v1.b }[2], [x9]
; CHECK-NEXT: tbz w8, #3, .LBB0_6
; CHECK-NEXT: .LBB0_21: // %cond.load7
; CHECK-NEXT: add x9, x0, #3
; CHECK-NEXT: ld1 { v1.b }[3], [x9]
; CHECK-NEXT: tbz w8, #4, .LBB0_7
; CHECK-NEXT: .LBB0_22: // %cond.load10
; CHECK-NEXT: add x9, x0, #4
; CHECK-NEXT: ld1 { v1.b }[4], [x9]
; CHECK-NEXT: tbz w8, #5, .LBB0_8
; CHECK-NEXT: .LBB0_23: // %cond.load13
; CHECK-NEXT: add x9, x0, #5
; CHECK-NEXT: ld1 { v1.b }[5], [x9]
; CHECK-NEXT: tbz w8, #6, .LBB0_9
; CHECK-NEXT: .LBB0_24: // %cond.load16
; CHECK-NEXT: add x9, x0, #6
; CHECK-NEXT: ld1 { v1.b }[6], [x9]
; CHECK-NEXT: tbz w8, #7, .LBB0_10
; CHECK-NEXT: .LBB0_25: // %cond.load19
; CHECK-NEXT: add x9, x0, #7
; CHECK-NEXT: ld1 { v1.b }[7], [x9]
; CHECK-NEXT: tbz w8, #8, .LBB0_11
; CHECK-NEXT: .LBB0_26: // %cond.load22
; CHECK-NEXT: add x9, x0, #8
; CHECK-NEXT: ld1 { v1.b }[8], [x9]
; CHECK-NEXT: tbz w8, #9, .LBB0_12
; CHECK-NEXT: .LBB0_27: // %cond.load25
; CHECK-NEXT: add x9, x0, #9
; CHECK-NEXT: ld1 { v1.b }[9], [x9]
; CHECK-NEXT: tbz w8, #10, .LBB0_13
; CHECK-NEXT: .LBB0_28: // %cond.load28
; CHECK-NEXT: add x9, x0, #10
; CHECK-NEXT: ld1 { v1.b }[10], [x9]
; CHECK-NEXT: tbz w8, #11, .LBB0_14
; CHECK-NEXT: .LBB0_29: // %cond.load31
; CHECK-NEXT: add x9, x0, #11
; CHECK-NEXT: ld1 { v1.b }[11], [x9]
; CHECK-NEXT: tbz w8, #12, .LBB0_15
; CHECK-NEXT: .LBB0_30: // %cond.load34
; CHECK-NEXT: add x9, x0, #12
; CHECK-NEXT: ld1 { v1.b }[12], [x9]
; CHECK-NEXT: tbz w8, #13, .LBB0_16
; CHECK-NEXT: .LBB0_31: // %cond.load37
; CHECK-NEXT: add x9, x0, #13
; CHECK-NEXT: ld1 { v1.b }[13], [x9]
; CHECK-NEXT: tbz w8, #14, .LBB0_17
; CHECK-NEXT: .LBB0_32: // %cond.load40
; CHECK-NEXT: add x9, x0, #14
; CHECK-NEXT: ld1 { v1.b }[14], [x9]
; CHECK-NEXT: tbz w8, #15, .LBB0_18
; CHECK-NEXT: .LBB0_33: // %cond.load43
; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: ld1 { v1.b }[15], [x9]
; CHECK-NEXT: tbnz w8, #16, .LBB0_19
; CHECK-NEXT: .LBB0_34:
; CHECK-NEXT: // implicit-def: $q2
; CHECK-NEXT: tbz w8, #17, .LBB0_36
; CHECK-NEXT: .LBB0_35: // %cond.load49
; CHECK-NEXT: add x9, x0, #17
; CHECK-NEXT: ld1 { v2.b }[1], [x9]
; CHECK-NEXT: .LBB0_36: // %else50
; CHECK-NEXT: tbnz w8, #18, .LBB0_52
; CHECK-NEXT: // %bb.37: // %else53
; CHECK-NEXT: tbnz w8, #19, .LBB0_53
; CHECK-NEXT: .LBB0_38: // %else56
; CHECK-NEXT: tbnz w8, #20, .LBB0_54
; CHECK-NEXT: .LBB0_39: // %else59
; CHECK-NEXT: tbnz w8, #21, .LBB0_55
; CHECK-NEXT: .LBB0_40: // %else62
; CHECK-NEXT: tbnz w8, #22, .LBB0_56
; CHECK-NEXT: .LBB0_41: // %else65
; CHECK-NEXT: tbnz w8, #23, .LBB0_57
; CHECK-NEXT: .LBB0_42: // %else68
; CHECK-NEXT: tbnz w8, #24, .LBB0_58
; CHECK-NEXT: .LBB0_43: // %else71
; CHECK-NEXT: tbnz w8, #25, .LBB0_59
; CHECK-NEXT: .LBB0_44: // %else74
; CHECK-NEXT: tbnz w8, #26, .LBB0_60
; CHECK-NEXT: .LBB0_45: // %else77
; CHECK-NEXT: tbnz w8, #27, .LBB0_61
; CHECK-NEXT: .LBB0_46: // %else80
; CHECK-NEXT: tbnz w8, #28, .LBB0_62
; CHECK-NEXT: .LBB0_47: // %else83
; CHECK-NEXT: tbnz w8, #29, .LBB0_63
; CHECK-NEXT: .LBB0_48: // %else86
; CHECK-NEXT: tbnz w8, #30, .LBB0_64
; CHECK-NEXT: .LBB0_49: // %else89
; CHECK-NEXT: tbz w8, #31, .LBB0_51
; CHECK-NEXT: .LBB0_50: // %cond.load91
; CHECK-NEXT: add x8, x0, #31
; CHECK-NEXT: ld1 { v2.b }[15], [x8]
; CHECK-NEXT: .LBB0_51: // %else92
; CHECK-NEXT: uzp1 v0.16b, v1.16b, v2.16b
; CHECK-NEXT: uzp2 v1.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_52: // %cond.load52
; CHECK-NEXT: add x9, x0, #18
; CHECK-NEXT: ld1 { v2.b }[2], [x9]
; CHECK-NEXT: tbz w8, #19, .LBB0_38
; CHECK-NEXT: .LBB0_53: // %cond.load55
; CHECK-NEXT: add x9, x0, #19
; CHECK-NEXT: ld1 { v2.b }[3], [x9]
; CHECK-NEXT: tbz w8, #20, .LBB0_39
; CHECK-NEXT: .LBB0_54: // %cond.load58
; CHECK-NEXT: add x9, x0, #20
; CHECK-NEXT: ld1 { v2.b }[4], [x9]
; CHECK-NEXT: tbz w8, #21, .LBB0_40
; CHECK-NEXT: .LBB0_55: // %cond.load61
; CHECK-NEXT: add x9, x0, #21
; CHECK-NEXT: ld1 { v2.b }[5], [x9]
; CHECK-NEXT: tbz w8, #22, .LBB0_41
; CHECK-NEXT: .LBB0_56: // %cond.load64
; CHECK-NEXT: add x9, x0, #22
; CHECK-NEXT: ld1 { v2.b }[6], [x9]
; CHECK-NEXT: tbz w8, #23, .LBB0_42
; CHECK-NEXT: .LBB0_57: // %cond.load67
; CHECK-NEXT: add x9, x0, #23
; CHECK-NEXT: ld1 { v2.b }[7], [x9]
; CHECK-NEXT: tbz w8, #24, .LBB0_43
; CHECK-NEXT: .LBB0_58: // %cond.load70
; CHECK-NEXT: add x9, x0, #24
; CHECK-NEXT: ld1 { v2.b }[8], [x9]
; CHECK-NEXT: tbz w8, #25, .LBB0_44
; CHECK-NEXT: .LBB0_59: // %cond.load73
; CHECK-NEXT: add x9, x0, #25
; CHECK-NEXT: ld1 { v2.b }[9], [x9]
; CHECK-NEXT: tbz w8, #26, .LBB0_45
; CHECK-NEXT: .LBB0_60: // %cond.load76
; CHECK-NEXT: add x9, x0, #26
; CHECK-NEXT: ld1 { v2.b }[10], [x9]
; CHECK-NEXT: tbz w8, #27, .LBB0_46
; CHECK-NEXT: .LBB0_61: // %cond.load79
; CHECK-NEXT: add x9, x0, #27
; CHECK-NEXT: ld1 { v2.b }[11], [x9]
; CHECK-NEXT: tbz w8, #28, .LBB0_47
; CHECK-NEXT: .LBB0_62: // %cond.load82
; CHECK-NEXT: add x9, x0, #28
; CHECK-NEXT: ld1 { v2.b }[12], [x9]
; CHECK-NEXT: tbz w8, #29, .LBB0_48
; CHECK-NEXT: .LBB0_63: // %cond.load85
; CHECK-NEXT: add x9, x0, #29
; CHECK-NEXT: ld1 { v2.b }[13], [x9]
; CHECK-NEXT: tbz w8, #30, .LBB0_49
; CHECK-NEXT: .LBB0_64: // %cond.load88
; CHECK-NEXT: add x9, x0, #30
; CHECK-NEXT: ld1 { v2.b }[14], [x9]
; CHECK-NEXT: tbnz w8, #31, .LBB0_50
; CHECK-NEXT: b .LBB0_51
%interleaved.mask = call <32 x i1> @llvm.vector.interleave2.v32i1(<16 x i1> %mask, <16 x i1> %mask)
%wide.masked.vec = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr %p, i32 1, <32 x i1> %interleaved.mask, <32 x i8> poison)
%deinterleaved.vec = call { <16 x i8>, <16 x i8> } @llvm.vector.deinterleave2.v32i8(<32 x i8> %wide.masked.vec)
ret { <16 x i8>, <16 x i8> } %deinterleaved.vec
}
define { <8 x i16>, <8 x i16> } @foo_ld2_v8i16(<8 x i1> %mask, ptr %p) {
; CHECK-LABEL: foo_ld2_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: adrp x8, .LCPI1_0
; CHECK-NEXT: zip1 v0.16b, v0.16b, v0.16b
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
; CHECK-NEXT: shl v0.16b, v0.16b, #7
; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: zip1 v0.16b, v0.16b, v1.16b
; CHECK-NEXT: addv h0, v0.8h
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: tbz w8, #0, .LBB1_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ldr h1, [x0]
; CHECK-NEXT: tbnz w8, #1, .LBB1_3
; CHECK-NEXT: b .LBB1_4
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: // implicit-def: $q1
; CHECK-NEXT: tbz w8, #1, .LBB1_4
; CHECK-NEXT: .LBB1_3: // %cond.load1
; CHECK-NEXT: add x9, x0, #2
; CHECK-NEXT: ld1 { v1.h }[1], [x9]
; CHECK-NEXT: .LBB1_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB1_12
; CHECK-NEXT: // %bb.5: // %else5
; CHECK-NEXT: tbnz w8, #3, .LBB1_13
; CHECK-NEXT: .LBB1_6: // %else8
; CHECK-NEXT: tbnz w8, #4, .LBB1_14
; CHECK-NEXT: .LBB1_7: // %else11
; CHECK-NEXT: tbnz w8, #5, .LBB1_15
; CHECK-NEXT: .LBB1_8: // %else14
; CHECK-NEXT: tbnz w8, #6, .LBB1_16
; CHECK-NEXT: .LBB1_9: // %else17
; CHECK-NEXT: tbnz w8, #7, .LBB1_17
; CHECK-NEXT: .LBB1_10: // %else20
; CHECK-NEXT: tbz w8, #8, .LBB1_18
; CHECK-NEXT: .LBB1_11: // %cond.load22
; CHECK-NEXT: add x9, x0, #16
; CHECK-NEXT: ld1 { v2.h }[0], [x9]
; CHECK-NEXT: tbnz w8, #9, .LBB1_19
; CHECK-NEXT: b .LBB1_20
; CHECK-NEXT: .LBB1_12: // %cond.load4
; CHECK-NEXT: add x9, x0, #4
; CHECK-NEXT: ld1 { v1.h }[2], [x9]
; CHECK-NEXT: tbz w8, #3, .LBB1_6
; CHECK-NEXT: .LBB1_13: // %cond.load7
; CHECK-NEXT: add x9, x0, #6
; CHECK-NEXT: ld1 { v1.h }[3], [x9]
; CHECK-NEXT: tbz w8, #4, .LBB1_7
; CHECK-NEXT: .LBB1_14: // %cond.load10
; CHECK-NEXT: add x9, x0, #8
; CHECK-NEXT: ld1 { v1.h }[4], [x9]
; CHECK-NEXT: tbz w8, #5, .LBB1_8
; CHECK-NEXT: .LBB1_15: // %cond.load13
; CHECK-NEXT: add x9, x0, #10
; CHECK-NEXT: ld1 { v1.h }[5], [x9]
; CHECK-NEXT: tbz w8, #6, .LBB1_9
; CHECK-NEXT: .LBB1_16: // %cond.load16
; CHECK-NEXT: add x9, x0, #12
; CHECK-NEXT: ld1 { v1.h }[6], [x9]
; CHECK-NEXT: tbz w8, #7, .LBB1_10
; CHECK-NEXT: .LBB1_17: // %cond.load19
; CHECK-NEXT: add x9, x0, #14
; CHECK-NEXT: ld1 { v1.h }[7], [x9]
; CHECK-NEXT: tbnz w8, #8, .LBB1_11
; CHECK-NEXT: .LBB1_18:
; CHECK-NEXT: // implicit-def: $q2
; CHECK-NEXT: tbz w8, #9, .LBB1_20
; CHECK-NEXT: .LBB1_19: // %cond.load25
; CHECK-NEXT: add x9, x0, #18
; CHECK-NEXT: ld1 { v2.h }[1], [x9]
; CHECK-NEXT: .LBB1_20: // %else26
; CHECK-NEXT: tbnz w8, #10, .LBB1_28
; CHECK-NEXT: // %bb.21: // %else29
; CHECK-NEXT: tbnz w8, #11, .LBB1_29
; CHECK-NEXT: .LBB1_22: // %else32
; CHECK-NEXT: tbnz w8, #12, .LBB1_30
; CHECK-NEXT: .LBB1_23: // %else35
; CHECK-NEXT: tbnz w8, #13, .LBB1_31
; CHECK-NEXT: .LBB1_24: // %else38
; CHECK-NEXT: tbnz w8, #14, .LBB1_32
; CHECK-NEXT: .LBB1_25: // %else41
; CHECK-NEXT: tbz w8, #15, .LBB1_27
; CHECK-NEXT: .LBB1_26: // %cond.load43
; CHECK-NEXT: add x8, x0, #30
; CHECK-NEXT: ld1 { v2.h }[7], [x8]
; CHECK-NEXT: .LBB1_27: // %else44
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v2.8h
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_28: // %cond.load28
; CHECK-NEXT: add x9, x0, #20
; CHECK-NEXT: ld1 { v2.h }[2], [x9]
; CHECK-NEXT: tbz w8, #11, .LBB1_22
; CHECK-NEXT: .LBB1_29: // %cond.load31
; CHECK-NEXT: add x9, x0, #22
; CHECK-NEXT: ld1 { v2.h }[3], [x9]
; CHECK-NEXT: tbz w8, #12, .LBB1_23
; CHECK-NEXT: .LBB1_30: // %cond.load34
; CHECK-NEXT: add x9, x0, #24
; CHECK-NEXT: ld1 { v2.h }[4], [x9]
; CHECK-NEXT: tbz w8, #13, .LBB1_24
; CHECK-NEXT: .LBB1_31: // %cond.load37
; CHECK-NEXT: add x9, x0, #26
; CHECK-NEXT: ld1 { v2.h }[5], [x9]
; CHECK-NEXT: tbz w8, #14, .LBB1_25
; CHECK-NEXT: .LBB1_32: // %cond.load40
; CHECK-NEXT: add x9, x0, #28
; CHECK-NEXT: ld1 { v2.h }[6], [x9]
; CHECK-NEXT: tbnz w8, #15, .LBB1_26
; CHECK-NEXT: b .LBB1_27
%interleaved.mask = call <16 x i1> @llvm.vector.interleave2.v16i1(<8 x i1> %mask, <8 x i1> %mask)
%wide.masked.vec = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr %p, i32 2, <16 x i1> %interleaved.mask, <16 x i16> poison)
%deinterleaved.vec = call { <8 x i16>, <8 x i16> } @llvm.vector.deinterleave2.v16i16(<16 x i16> %wide.masked.vec)
ret { <8 x i16>, <8 x i16> } %deinterleaved.vec
}
define { <4 x float>, <4 x float> } @foo_ld2_v4f32(<4 x i1> %mask, ptr %p) {
; CHECK-LABEL: foo_ld2_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-NEXT: adrp x8, .LCPI2_0
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI2_0]
; CHECK-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-NEXT: shl v0.8b, v0.8b, #7
; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: addv b0, v0.8b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: tbz w8, #0, .LBB2_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ldr s1, [x0]
; CHECK-NEXT: tbnz w8, #1, .LBB2_3
; CHECK-NEXT: b .LBB2_4
; CHECK-NEXT: .LBB2_2:
; CHECK-NEXT: // implicit-def: $q1
; CHECK-NEXT: tbz w8, #1, .LBB2_4
; CHECK-NEXT: .LBB2_3: // %cond.load1
; CHECK-NEXT: add x9, x0, #4
; CHECK-NEXT: ld1 { v1.s }[1], [x9]
; CHECK-NEXT: .LBB2_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB2_8
; CHECK-NEXT: // %bb.5: // %else5
; CHECK-NEXT: tbnz w8, #3, .LBB2_9
; CHECK-NEXT: .LBB2_6: // %else8
; CHECK-NEXT: tbz w8, #4, .LBB2_10
; CHECK-NEXT: .LBB2_7: // %cond.load10
; CHECK-NEXT: add x9, x0, #16
; CHECK-NEXT: ld1 { v2.s }[0], [x9]
; CHECK-NEXT: tbnz w8, #5, .LBB2_11
; CHECK-NEXT: b .LBB2_12
; CHECK-NEXT: .LBB2_8: // %cond.load4
; CHECK-NEXT: add x9, x0, #8
; CHECK-NEXT: ld1 { v1.s }[2], [x9]
; CHECK-NEXT: tbz w8, #3, .LBB2_6
; CHECK-NEXT: .LBB2_9: // %cond.load7
; CHECK-NEXT: add x9, x0, #12
; CHECK-NEXT: ld1 { v1.s }[3], [x9]
; CHECK-NEXT: tbnz w8, #4, .LBB2_7
; CHECK-NEXT: .LBB2_10:
; CHECK-NEXT: // implicit-def: $q2
; CHECK-NEXT: tbz w8, #5, .LBB2_12
; CHECK-NEXT: .LBB2_11: // %cond.load13
; CHECK-NEXT: add x9, x0, #20
; CHECK-NEXT: ld1 { v2.s }[1], [x9]
; CHECK-NEXT: .LBB2_12: // %else14
; CHECK-NEXT: tbnz w8, #6, .LBB2_16
; CHECK-NEXT: // %bb.13: // %else17
; CHECK-NEXT: tbz w8, #7, .LBB2_15
; CHECK-NEXT: .LBB2_14: // %cond.load19
; CHECK-NEXT: add x8, x0, #28
; CHECK-NEXT: ld1 { v2.s }[3], [x8]
; CHECK-NEXT: .LBB2_15: // %else20
; CHECK-NEXT: uzp1 v0.4s, v1.4s, v2.4s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_16: // %cond.load16
; CHECK-NEXT: add x9, x0, #24
; CHECK-NEXT: ld1 { v2.s }[2], [x9]
; CHECK-NEXT: tbnz w8, #7, .LBB2_14
; CHECK-NEXT: b .LBB2_15
%interleaved.mask = call <8 x i1> @llvm.vector.interleave2.v8i1(<4 x i1> %mask, <4 x i1> %mask)
%wide.masked.vec = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %p, i32 4, <8 x i1> %interleaved.mask, <8 x float> poison)
%deinterleaved.vec = call { <4 x float>, <4 x float> } @llvm.vector.deinterleave2.v16f32(<8 x float> %wide.masked.vec)
ret { <4 x float>, <4 x float> } %deinterleaved.vec
}
define { <2 x double>, <2 x double> } @foo_ld2_v2f64(<2 x i1> %mask, ptr %p) {
; CHECK-LABEL: foo_ld2_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h
; CHECK-NEXT: adrp x8, .LCPI3_0
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_0]
; CHECK-NEXT: zip1 v0.4h, v0.4h, v0.4h
; CHECK-NEXT: shl v0.4h, v0.4h, #15
; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: addv h0, v0.4h
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: tbz w8, #0, .LBB3_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: tbnz w8, #1, .LBB3_3
; CHECK-NEXT: b .LBB3_4
; CHECK-NEXT: .LBB3_2:
; CHECK-NEXT: // implicit-def: $q1
; CHECK-NEXT: tbz w8, #1, .LBB3_4
; CHECK-NEXT: .LBB3_3: // %cond.load1
; CHECK-NEXT: add x9, x0, #8
; CHECK-NEXT: ld1 { v1.d }[1], [x9]
; CHECK-NEXT: .LBB3_4: // %else2
; CHECK-NEXT: tbz w8, #2, .LBB3_6
; CHECK-NEXT: // %bb.5: // %cond.load4
; CHECK-NEXT: add x9, x0, #16
; CHECK-NEXT: ld1 { v2.d }[0], [x9]
; CHECK-NEXT: tbnz w8, #3, .LBB3_7
; CHECK-NEXT: b .LBB3_8
; CHECK-NEXT: .LBB3_6:
; CHECK-NEXT: // implicit-def: $q2
; CHECK-NEXT: tbz w8, #3, .LBB3_8
; CHECK-NEXT: .LBB3_7: // %cond.load7
; CHECK-NEXT: add x8, x0, #24
; CHECK-NEXT: ld1 { v2.d }[1], [x8]
; CHECK-NEXT: .LBB3_8: // %else8
; CHECK-NEXT: zip1 v0.2d, v1.2d, v2.2d
; CHECK-NEXT: zip2 v1.2d, v1.2d, v2.2d
; CHECK-NEXT: ret
%interleaved.mask = call <4 x i1> @llvm.vector.interleave2.v4i1(<2 x i1> %mask, <2 x i1> %mask)
%wide.masked.vec = call <4 x double> @llvm.masked.load.v4f64.p0(ptr %p, i32 8, <4 x i1> %interleaved.mask, <4 x double> poison)
%deinterleaved.vec = call { <2 x double>, <2 x double> } @llvm.vector.deinterleave2.v4f64(<4 x double> %wide.masked.vec)
ret { <2 x double>, <2 x double> } %deinterleaved.vec
}