| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s |
| ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s |
| |
| define { <16 x i8>, <16 x i8> } @foo_ld2_v16i8(<16 x i1> %mask, ptr %p) { |
| ; CHECK-LABEL: foo_ld2_v16i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: zip2 v1.16b, v0.16b, v0.16b |
| ; CHECK-NEXT: zip1 v0.16b, v0.16b, v0.16b |
| ; CHECK-NEXT: adrp x8, .LCPI0_0 |
| ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_0] |
| ; CHECK-NEXT: shl v1.16b, v1.16b, #7 |
| ; CHECK-NEXT: shl v0.16b, v0.16b, #7 |
| ; CHECK-NEXT: cmlt v1.16b, v1.16b, #0 |
| ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 |
| ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b |
| ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b |
| ; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 |
| ; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 |
| ; CHECK-NEXT: zip1 v1.16b, v1.16b, v2.16b |
| ; CHECK-NEXT: zip1 v0.16b, v0.16b, v3.16b |
| ; CHECK-NEXT: addv h1, v1.8h |
| ; CHECK-NEXT: addv h0, v0.8h |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: fmov w8, s0 |
| ; CHECK-NEXT: bfi w8, w9, #16, #16 |
| ; CHECK-NEXT: tbz w8, #0, .LBB0_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ldr b1, [x0] |
| ; CHECK-NEXT: tbnz w8, #1, .LBB0_3 |
| ; CHECK-NEXT: b .LBB0_4 |
| ; CHECK-NEXT: .LBB0_2: |
| ; CHECK-NEXT: // implicit-def: $q1 |
| ; CHECK-NEXT: tbz w8, #1, .LBB0_4 |
| ; CHECK-NEXT: .LBB0_3: // %cond.load1 |
| ; CHECK-NEXT: add x9, x0, #1 |
| ; CHECK-NEXT: ld1 { v1.b }[1], [x9] |
| ; CHECK-NEXT: .LBB0_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB0_20 |
| ; CHECK-NEXT: // %bb.5: // %else5 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB0_21 |
| ; CHECK-NEXT: .LBB0_6: // %else8 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB0_22 |
| ; CHECK-NEXT: .LBB0_7: // %else11 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB0_23 |
| ; CHECK-NEXT: .LBB0_8: // %else14 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB0_24 |
| ; CHECK-NEXT: .LBB0_9: // %else17 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB0_25 |
| ; CHECK-NEXT: .LBB0_10: // %else20 |
| ; CHECK-NEXT: tbnz w8, #8, .LBB0_26 |
| ; CHECK-NEXT: .LBB0_11: // %else23 |
| ; CHECK-NEXT: tbnz w8, #9, .LBB0_27 |
| ; CHECK-NEXT: .LBB0_12: // %else26 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB0_28 |
| ; CHECK-NEXT: .LBB0_13: // %else29 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB0_29 |
| ; CHECK-NEXT: .LBB0_14: // %else32 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB0_30 |
| ; CHECK-NEXT: .LBB0_15: // %else35 |
| ; CHECK-NEXT: tbnz w8, #13, .LBB0_31 |
| ; CHECK-NEXT: .LBB0_16: // %else38 |
| ; CHECK-NEXT: tbnz w8, #14, .LBB0_32 |
| ; CHECK-NEXT: .LBB0_17: // %else41 |
| ; CHECK-NEXT: tbnz w8, #15, .LBB0_33 |
| ; CHECK-NEXT: .LBB0_18: // %else44 |
| ; CHECK-NEXT: tbz w8, #16, .LBB0_34 |
| ; CHECK-NEXT: .LBB0_19: // %cond.load46 |
| ; CHECK-NEXT: add x9, x0, #16 |
| ; CHECK-NEXT: ld1 { v2.b }[0], [x9] |
| ; CHECK-NEXT: tbnz w8, #17, .LBB0_35 |
| ; CHECK-NEXT: b .LBB0_36 |
| ; CHECK-NEXT: .LBB0_20: // %cond.load4 |
| ; CHECK-NEXT: add x9, x0, #2 |
| ; CHECK-NEXT: ld1 { v1.b }[2], [x9] |
| ; CHECK-NEXT: tbz w8, #3, .LBB0_6 |
| ; CHECK-NEXT: .LBB0_21: // %cond.load7 |
| ; CHECK-NEXT: add x9, x0, #3 |
| ; CHECK-NEXT: ld1 { v1.b }[3], [x9] |
| ; CHECK-NEXT: tbz w8, #4, .LBB0_7 |
| ; CHECK-NEXT: .LBB0_22: // %cond.load10 |
| ; CHECK-NEXT: add x9, x0, #4 |
| ; CHECK-NEXT: ld1 { v1.b }[4], [x9] |
| ; CHECK-NEXT: tbz w8, #5, .LBB0_8 |
| ; CHECK-NEXT: .LBB0_23: // %cond.load13 |
| ; CHECK-NEXT: add x9, x0, #5 |
| ; CHECK-NEXT: ld1 { v1.b }[5], [x9] |
| ; CHECK-NEXT: tbz w8, #6, .LBB0_9 |
| ; CHECK-NEXT: .LBB0_24: // %cond.load16 |
| ; CHECK-NEXT: add x9, x0, #6 |
| ; CHECK-NEXT: ld1 { v1.b }[6], [x9] |
| ; CHECK-NEXT: tbz w8, #7, .LBB0_10 |
| ; CHECK-NEXT: .LBB0_25: // %cond.load19 |
| ; CHECK-NEXT: add x9, x0, #7 |
| ; CHECK-NEXT: ld1 { v1.b }[7], [x9] |
| ; CHECK-NEXT: tbz w8, #8, .LBB0_11 |
| ; CHECK-NEXT: .LBB0_26: // %cond.load22 |
| ; CHECK-NEXT: add x9, x0, #8 |
| ; CHECK-NEXT: ld1 { v1.b }[8], [x9] |
| ; CHECK-NEXT: tbz w8, #9, .LBB0_12 |
| ; CHECK-NEXT: .LBB0_27: // %cond.load25 |
| ; CHECK-NEXT: add x9, x0, #9 |
| ; CHECK-NEXT: ld1 { v1.b }[9], [x9] |
| ; CHECK-NEXT: tbz w8, #10, .LBB0_13 |
| ; CHECK-NEXT: .LBB0_28: // %cond.load28 |
| ; CHECK-NEXT: add x9, x0, #10 |
| ; CHECK-NEXT: ld1 { v1.b }[10], [x9] |
| ; CHECK-NEXT: tbz w8, #11, .LBB0_14 |
| ; CHECK-NEXT: .LBB0_29: // %cond.load31 |
| ; CHECK-NEXT: add x9, x0, #11 |
| ; CHECK-NEXT: ld1 { v1.b }[11], [x9] |
| ; CHECK-NEXT: tbz w8, #12, .LBB0_15 |
| ; CHECK-NEXT: .LBB0_30: // %cond.load34 |
| ; CHECK-NEXT: add x9, x0, #12 |
| ; CHECK-NEXT: ld1 { v1.b }[12], [x9] |
| ; CHECK-NEXT: tbz w8, #13, .LBB0_16 |
| ; CHECK-NEXT: .LBB0_31: // %cond.load37 |
| ; CHECK-NEXT: add x9, x0, #13 |
| ; CHECK-NEXT: ld1 { v1.b }[13], [x9] |
| ; CHECK-NEXT: tbz w8, #14, .LBB0_17 |
| ; CHECK-NEXT: .LBB0_32: // %cond.load40 |
| ; CHECK-NEXT: add x9, x0, #14 |
| ; CHECK-NEXT: ld1 { v1.b }[14], [x9] |
| ; CHECK-NEXT: tbz w8, #15, .LBB0_18 |
| ; CHECK-NEXT: .LBB0_33: // %cond.load43 |
| ; CHECK-NEXT: add x9, x0, #15 |
| ; CHECK-NEXT: ld1 { v1.b }[15], [x9] |
| ; CHECK-NEXT: tbnz w8, #16, .LBB0_19 |
| ; CHECK-NEXT: .LBB0_34: |
| ; CHECK-NEXT: // implicit-def: $q2 |
| ; CHECK-NEXT: tbz w8, #17, .LBB0_36 |
| ; CHECK-NEXT: .LBB0_35: // %cond.load49 |
| ; CHECK-NEXT: add x9, x0, #17 |
| ; CHECK-NEXT: ld1 { v2.b }[1], [x9] |
| ; CHECK-NEXT: .LBB0_36: // %else50 |
| ; CHECK-NEXT: tbnz w8, #18, .LBB0_52 |
| ; CHECK-NEXT: // %bb.37: // %else53 |
| ; CHECK-NEXT: tbnz w8, #19, .LBB0_53 |
| ; CHECK-NEXT: .LBB0_38: // %else56 |
| ; CHECK-NEXT: tbnz w8, #20, .LBB0_54 |
| ; CHECK-NEXT: .LBB0_39: // %else59 |
| ; CHECK-NEXT: tbnz w8, #21, .LBB0_55 |
| ; CHECK-NEXT: .LBB0_40: // %else62 |
| ; CHECK-NEXT: tbnz w8, #22, .LBB0_56 |
| ; CHECK-NEXT: .LBB0_41: // %else65 |
| ; CHECK-NEXT: tbnz w8, #23, .LBB0_57 |
| ; CHECK-NEXT: .LBB0_42: // %else68 |
| ; CHECK-NEXT: tbnz w8, #24, .LBB0_58 |
| ; CHECK-NEXT: .LBB0_43: // %else71 |
| ; CHECK-NEXT: tbnz w8, #25, .LBB0_59 |
| ; CHECK-NEXT: .LBB0_44: // %else74 |
| ; CHECK-NEXT: tbnz w8, #26, .LBB0_60 |
| ; CHECK-NEXT: .LBB0_45: // %else77 |
| ; CHECK-NEXT: tbnz w8, #27, .LBB0_61 |
| ; CHECK-NEXT: .LBB0_46: // %else80 |
| ; CHECK-NEXT: tbnz w8, #28, .LBB0_62 |
| ; CHECK-NEXT: .LBB0_47: // %else83 |
| ; CHECK-NEXT: tbnz w8, #29, .LBB0_63 |
| ; CHECK-NEXT: .LBB0_48: // %else86 |
| ; CHECK-NEXT: tbnz w8, #30, .LBB0_64 |
| ; CHECK-NEXT: .LBB0_49: // %else89 |
| ; CHECK-NEXT: tbz w8, #31, .LBB0_51 |
| ; CHECK-NEXT: .LBB0_50: // %cond.load91 |
| ; CHECK-NEXT: add x8, x0, #31 |
| ; CHECK-NEXT: ld1 { v2.b }[15], [x8] |
| ; CHECK-NEXT: .LBB0_51: // %else92 |
| ; CHECK-NEXT: uzp1 v0.16b, v1.16b, v2.16b |
| ; CHECK-NEXT: uzp2 v1.16b, v1.16b, v2.16b |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB0_52: // %cond.load52 |
| ; CHECK-NEXT: add x9, x0, #18 |
| ; CHECK-NEXT: ld1 { v2.b }[2], [x9] |
| ; CHECK-NEXT: tbz w8, #19, .LBB0_38 |
| ; CHECK-NEXT: .LBB0_53: // %cond.load55 |
| ; CHECK-NEXT: add x9, x0, #19 |
| ; CHECK-NEXT: ld1 { v2.b }[3], [x9] |
| ; CHECK-NEXT: tbz w8, #20, .LBB0_39 |
| ; CHECK-NEXT: .LBB0_54: // %cond.load58 |
| ; CHECK-NEXT: add x9, x0, #20 |
| ; CHECK-NEXT: ld1 { v2.b }[4], [x9] |
| ; CHECK-NEXT: tbz w8, #21, .LBB0_40 |
| ; CHECK-NEXT: .LBB0_55: // %cond.load61 |
| ; CHECK-NEXT: add x9, x0, #21 |
| ; CHECK-NEXT: ld1 { v2.b }[5], [x9] |
| ; CHECK-NEXT: tbz w8, #22, .LBB0_41 |
| ; CHECK-NEXT: .LBB0_56: // %cond.load64 |
| ; CHECK-NEXT: add x9, x0, #22 |
| ; CHECK-NEXT: ld1 { v2.b }[6], [x9] |
| ; CHECK-NEXT: tbz w8, #23, .LBB0_42 |
| ; CHECK-NEXT: .LBB0_57: // %cond.load67 |
| ; CHECK-NEXT: add x9, x0, #23 |
| ; CHECK-NEXT: ld1 { v2.b }[7], [x9] |
| ; CHECK-NEXT: tbz w8, #24, .LBB0_43 |
| ; CHECK-NEXT: .LBB0_58: // %cond.load70 |
| ; CHECK-NEXT: add x9, x0, #24 |
| ; CHECK-NEXT: ld1 { v2.b }[8], [x9] |
| ; CHECK-NEXT: tbz w8, #25, .LBB0_44 |
| ; CHECK-NEXT: .LBB0_59: // %cond.load73 |
| ; CHECK-NEXT: add x9, x0, #25 |
| ; CHECK-NEXT: ld1 { v2.b }[9], [x9] |
| ; CHECK-NEXT: tbz w8, #26, .LBB0_45 |
| ; CHECK-NEXT: .LBB0_60: // %cond.load76 |
| ; CHECK-NEXT: add x9, x0, #26 |
| ; CHECK-NEXT: ld1 { v2.b }[10], [x9] |
| ; CHECK-NEXT: tbz w8, #27, .LBB0_46 |
| ; CHECK-NEXT: .LBB0_61: // %cond.load79 |
| ; CHECK-NEXT: add x9, x0, #27 |
| ; CHECK-NEXT: ld1 { v2.b }[11], [x9] |
| ; CHECK-NEXT: tbz w8, #28, .LBB0_47 |
| ; CHECK-NEXT: .LBB0_62: // %cond.load82 |
| ; CHECK-NEXT: add x9, x0, #28 |
| ; CHECK-NEXT: ld1 { v2.b }[12], [x9] |
| ; CHECK-NEXT: tbz w8, #29, .LBB0_48 |
| ; CHECK-NEXT: .LBB0_63: // %cond.load85 |
| ; CHECK-NEXT: add x9, x0, #29 |
| ; CHECK-NEXT: ld1 { v2.b }[13], [x9] |
| ; CHECK-NEXT: tbz w8, #30, .LBB0_49 |
| ; CHECK-NEXT: .LBB0_64: // %cond.load88 |
| ; CHECK-NEXT: add x9, x0, #30 |
| ; CHECK-NEXT: ld1 { v2.b }[14], [x9] |
| ; CHECK-NEXT: tbnz w8, #31, .LBB0_50 |
| ; CHECK-NEXT: b .LBB0_51 |
| %interleaved.mask = call <32 x i1> @llvm.vector.interleave2.v32i1(<16 x i1> %mask, <16 x i1> %mask) |
| %wide.masked.vec = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr %p, i32 1, <32 x i1> %interleaved.mask, <32 x i8> poison) |
| %deinterleaved.vec = call { <16 x i8>, <16 x i8> } @llvm.vector.deinterleave2.v32i8(<32 x i8> %wide.masked.vec) |
| ret { <16 x i8>, <16 x i8> } %deinterleaved.vec |
| } |
| |
| define { <8 x i16>, <8 x i16> } @foo_ld2_v8i16(<8 x i1> %mask, ptr %p) { |
| ; CHECK-LABEL: foo_ld2_v8i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: adrp x8, .LCPI1_0 |
| ; CHECK-NEXT: zip1 v0.16b, v0.16b, v0.16b |
| ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] |
| ; CHECK-NEXT: shl v0.16b, v0.16b, #7 |
| ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 |
| ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b |
| ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-NEXT: zip1 v0.16b, v0.16b, v1.16b |
| ; CHECK-NEXT: addv h0, v0.8h |
| ; CHECK-NEXT: fmov w8, s0 |
| ; CHECK-NEXT: tbz w8, #0, .LBB1_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ldr h1, [x0] |
| ; CHECK-NEXT: tbnz w8, #1, .LBB1_3 |
| ; CHECK-NEXT: b .LBB1_4 |
| ; CHECK-NEXT: .LBB1_2: |
| ; CHECK-NEXT: // implicit-def: $q1 |
| ; CHECK-NEXT: tbz w8, #1, .LBB1_4 |
| ; CHECK-NEXT: .LBB1_3: // %cond.load1 |
| ; CHECK-NEXT: add x9, x0, #2 |
| ; CHECK-NEXT: ld1 { v1.h }[1], [x9] |
| ; CHECK-NEXT: .LBB1_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB1_12 |
| ; CHECK-NEXT: // %bb.5: // %else5 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB1_13 |
| ; CHECK-NEXT: .LBB1_6: // %else8 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB1_14 |
| ; CHECK-NEXT: .LBB1_7: // %else11 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB1_15 |
| ; CHECK-NEXT: .LBB1_8: // %else14 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB1_16 |
| ; CHECK-NEXT: .LBB1_9: // %else17 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB1_17 |
| ; CHECK-NEXT: .LBB1_10: // %else20 |
| ; CHECK-NEXT: tbz w8, #8, .LBB1_18 |
| ; CHECK-NEXT: .LBB1_11: // %cond.load22 |
| ; CHECK-NEXT: add x9, x0, #16 |
| ; CHECK-NEXT: ld1 { v2.h }[0], [x9] |
| ; CHECK-NEXT: tbnz w8, #9, .LBB1_19 |
| ; CHECK-NEXT: b .LBB1_20 |
| ; CHECK-NEXT: .LBB1_12: // %cond.load4 |
| ; CHECK-NEXT: add x9, x0, #4 |
| ; CHECK-NEXT: ld1 { v1.h }[2], [x9] |
| ; CHECK-NEXT: tbz w8, #3, .LBB1_6 |
| ; CHECK-NEXT: .LBB1_13: // %cond.load7 |
| ; CHECK-NEXT: add x9, x0, #6 |
| ; CHECK-NEXT: ld1 { v1.h }[3], [x9] |
| ; CHECK-NEXT: tbz w8, #4, .LBB1_7 |
| ; CHECK-NEXT: .LBB1_14: // %cond.load10 |
| ; CHECK-NEXT: add x9, x0, #8 |
| ; CHECK-NEXT: ld1 { v1.h }[4], [x9] |
| ; CHECK-NEXT: tbz w8, #5, .LBB1_8 |
| ; CHECK-NEXT: .LBB1_15: // %cond.load13 |
| ; CHECK-NEXT: add x9, x0, #10 |
| ; CHECK-NEXT: ld1 { v1.h }[5], [x9] |
| ; CHECK-NEXT: tbz w8, #6, .LBB1_9 |
| ; CHECK-NEXT: .LBB1_16: // %cond.load16 |
| ; CHECK-NEXT: add x9, x0, #12 |
| ; CHECK-NEXT: ld1 { v1.h }[6], [x9] |
| ; CHECK-NEXT: tbz w8, #7, .LBB1_10 |
| ; CHECK-NEXT: .LBB1_17: // %cond.load19 |
| ; CHECK-NEXT: add x9, x0, #14 |
| ; CHECK-NEXT: ld1 { v1.h }[7], [x9] |
| ; CHECK-NEXT: tbnz w8, #8, .LBB1_11 |
| ; CHECK-NEXT: .LBB1_18: |
| ; CHECK-NEXT: // implicit-def: $q2 |
| ; CHECK-NEXT: tbz w8, #9, .LBB1_20 |
| ; CHECK-NEXT: .LBB1_19: // %cond.load25 |
| ; CHECK-NEXT: add x9, x0, #18 |
| ; CHECK-NEXT: ld1 { v2.h }[1], [x9] |
| ; CHECK-NEXT: .LBB1_20: // %else26 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB1_28 |
| ; CHECK-NEXT: // %bb.21: // %else29 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB1_29 |
| ; CHECK-NEXT: .LBB1_22: // %else32 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB1_30 |
| ; CHECK-NEXT: .LBB1_23: // %else35 |
| ; CHECK-NEXT: tbnz w8, #13, .LBB1_31 |
| ; CHECK-NEXT: .LBB1_24: // %else38 |
| ; CHECK-NEXT: tbnz w8, #14, .LBB1_32 |
| ; CHECK-NEXT: .LBB1_25: // %else41 |
| ; CHECK-NEXT: tbz w8, #15, .LBB1_27 |
| ; CHECK-NEXT: .LBB1_26: // %cond.load43 |
| ; CHECK-NEXT: add x8, x0, #30 |
| ; CHECK-NEXT: ld1 { v2.h }[7], [x8] |
| ; CHECK-NEXT: .LBB1_27: // %else44 |
| ; CHECK-NEXT: uzp1 v0.8h, v1.8h, v2.8h |
| ; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB1_28: // %cond.load28 |
| ; CHECK-NEXT: add x9, x0, #20 |
| ; CHECK-NEXT: ld1 { v2.h }[2], [x9] |
| ; CHECK-NEXT: tbz w8, #11, .LBB1_22 |
| ; CHECK-NEXT: .LBB1_29: // %cond.load31 |
| ; CHECK-NEXT: add x9, x0, #22 |
| ; CHECK-NEXT: ld1 { v2.h }[3], [x9] |
| ; CHECK-NEXT: tbz w8, #12, .LBB1_23 |
| ; CHECK-NEXT: .LBB1_30: // %cond.load34 |
| ; CHECK-NEXT: add x9, x0, #24 |
| ; CHECK-NEXT: ld1 { v2.h }[4], [x9] |
| ; CHECK-NEXT: tbz w8, #13, .LBB1_24 |
| ; CHECK-NEXT: .LBB1_31: // %cond.load37 |
| ; CHECK-NEXT: add x9, x0, #26 |
| ; CHECK-NEXT: ld1 { v2.h }[5], [x9] |
| ; CHECK-NEXT: tbz w8, #14, .LBB1_25 |
| ; CHECK-NEXT: .LBB1_32: // %cond.load40 |
| ; CHECK-NEXT: add x9, x0, #28 |
| ; CHECK-NEXT: ld1 { v2.h }[6], [x9] |
| ; CHECK-NEXT: tbnz w8, #15, .LBB1_26 |
| ; CHECK-NEXT: b .LBB1_27 |
| %interleaved.mask = call <16 x i1> @llvm.vector.interleave2.v16i1(<8 x i1> %mask, <8 x i1> %mask) |
| %wide.masked.vec = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr %p, i32 2, <16 x i1> %interleaved.mask, <16 x i16> poison) |
| %deinterleaved.vec = call { <8 x i16>, <8 x i16> } @llvm.vector.deinterleave2.v16i16(<16 x i16> %wide.masked.vec) |
| ret { <8 x i16>, <8 x i16> } %deinterleaved.vec |
| } |
| |
| define { <4 x float>, <4 x float> } @foo_ld2_v4f32(<4 x i1> %mask, ptr %p) { |
| ; CHECK-LABEL: foo_ld2_v4f32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b |
| ; CHECK-NEXT: adrp x8, .LCPI2_0 |
| ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI2_0] |
| ; CHECK-NEXT: zip1 v0.8b, v0.8b, v0.8b |
| ; CHECK-NEXT: shl v0.8b, v0.8b, #7 |
| ; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 |
| ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b |
| ; CHECK-NEXT: addv b0, v0.8b |
| ; CHECK-NEXT: fmov w8, s0 |
| ; CHECK-NEXT: tbz w8, #0, .LBB2_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ldr s1, [x0] |
| ; CHECK-NEXT: tbnz w8, #1, .LBB2_3 |
| ; CHECK-NEXT: b .LBB2_4 |
| ; CHECK-NEXT: .LBB2_2: |
| ; CHECK-NEXT: // implicit-def: $q1 |
| ; CHECK-NEXT: tbz w8, #1, .LBB2_4 |
| ; CHECK-NEXT: .LBB2_3: // %cond.load1 |
| ; CHECK-NEXT: add x9, x0, #4 |
| ; CHECK-NEXT: ld1 { v1.s }[1], [x9] |
| ; CHECK-NEXT: .LBB2_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB2_8 |
| ; CHECK-NEXT: // %bb.5: // %else5 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB2_9 |
| ; CHECK-NEXT: .LBB2_6: // %else8 |
| ; CHECK-NEXT: tbz w8, #4, .LBB2_10 |
| ; CHECK-NEXT: .LBB2_7: // %cond.load10 |
| ; CHECK-NEXT: add x9, x0, #16 |
| ; CHECK-NEXT: ld1 { v2.s }[0], [x9] |
| ; CHECK-NEXT: tbnz w8, #5, .LBB2_11 |
| ; CHECK-NEXT: b .LBB2_12 |
| ; CHECK-NEXT: .LBB2_8: // %cond.load4 |
| ; CHECK-NEXT: add x9, x0, #8 |
| ; CHECK-NEXT: ld1 { v1.s }[2], [x9] |
| ; CHECK-NEXT: tbz w8, #3, .LBB2_6 |
| ; CHECK-NEXT: .LBB2_9: // %cond.load7 |
| ; CHECK-NEXT: add x9, x0, #12 |
| ; CHECK-NEXT: ld1 { v1.s }[3], [x9] |
| ; CHECK-NEXT: tbnz w8, #4, .LBB2_7 |
| ; CHECK-NEXT: .LBB2_10: |
| ; CHECK-NEXT: // implicit-def: $q2 |
| ; CHECK-NEXT: tbz w8, #5, .LBB2_12 |
| ; CHECK-NEXT: .LBB2_11: // %cond.load13 |
| ; CHECK-NEXT: add x9, x0, #20 |
| ; CHECK-NEXT: ld1 { v2.s }[1], [x9] |
| ; CHECK-NEXT: .LBB2_12: // %else14 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB2_16 |
| ; CHECK-NEXT: // %bb.13: // %else17 |
| ; CHECK-NEXT: tbz w8, #7, .LBB2_15 |
| ; CHECK-NEXT: .LBB2_14: // %cond.load19 |
| ; CHECK-NEXT: add x8, x0, #28 |
| ; CHECK-NEXT: ld1 { v2.s }[3], [x8] |
| ; CHECK-NEXT: .LBB2_15: // %else20 |
| ; CHECK-NEXT: uzp1 v0.4s, v1.4s, v2.4s |
| ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB2_16: // %cond.load16 |
| ; CHECK-NEXT: add x9, x0, #24 |
| ; CHECK-NEXT: ld1 { v2.s }[2], [x9] |
| ; CHECK-NEXT: tbnz w8, #7, .LBB2_14 |
| ; CHECK-NEXT: b .LBB2_15 |
| %interleaved.mask = call <8 x i1> @llvm.vector.interleave2.v8i1(<4 x i1> %mask, <4 x i1> %mask) |
| %wide.masked.vec = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %p, i32 4, <8 x i1> %interleaved.mask, <8 x float> poison) |
| %deinterleaved.vec = call { <4 x float>, <4 x float> } @llvm.vector.deinterleave2.v16f32(<8 x float> %wide.masked.vec) |
| ret { <4 x float>, <4 x float> } %deinterleaved.vec |
| } |
| |
| define { <2 x double>, <2 x double> } @foo_ld2_v2f64(<2 x i1> %mask, ptr %p) { |
| ; CHECK-LABEL: foo_ld2_v2f64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h |
| ; CHECK-NEXT: adrp x8, .LCPI3_0 |
| ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_0] |
| ; CHECK-NEXT: zip1 v0.4h, v0.4h, v0.4h |
| ; CHECK-NEXT: shl v0.4h, v0.4h, #15 |
| ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 |
| ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b |
| ; CHECK-NEXT: addv h0, v0.4h |
| ; CHECK-NEXT: fmov w8, s0 |
| ; CHECK-NEXT: tbz w8, #0, .LBB3_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ldr d1, [x0] |
| ; CHECK-NEXT: tbnz w8, #1, .LBB3_3 |
| ; CHECK-NEXT: b .LBB3_4 |
| ; CHECK-NEXT: .LBB3_2: |
| ; CHECK-NEXT: // implicit-def: $q1 |
| ; CHECK-NEXT: tbz w8, #1, .LBB3_4 |
| ; CHECK-NEXT: .LBB3_3: // %cond.load1 |
| ; CHECK-NEXT: add x9, x0, #8 |
| ; CHECK-NEXT: ld1 { v1.d }[1], [x9] |
| ; CHECK-NEXT: .LBB3_4: // %else2 |
| ; CHECK-NEXT: tbz w8, #2, .LBB3_6 |
| ; CHECK-NEXT: // %bb.5: // %cond.load4 |
| ; CHECK-NEXT: add x9, x0, #16 |
| ; CHECK-NEXT: ld1 { v2.d }[0], [x9] |
| ; CHECK-NEXT: tbnz w8, #3, .LBB3_7 |
| ; CHECK-NEXT: b .LBB3_8 |
| ; CHECK-NEXT: .LBB3_6: |
| ; CHECK-NEXT: // implicit-def: $q2 |
| ; CHECK-NEXT: tbz w8, #3, .LBB3_8 |
| ; CHECK-NEXT: .LBB3_7: // %cond.load7 |
| ; CHECK-NEXT: add x8, x0, #24 |
| ; CHECK-NEXT: ld1 { v2.d }[1], [x8] |
| ; CHECK-NEXT: .LBB3_8: // %else8 |
| ; CHECK-NEXT: zip1 v0.2d, v1.2d, v2.2d |
| ; CHECK-NEXT: zip2 v1.2d, v1.2d, v2.2d |
| ; CHECK-NEXT: ret |
| %interleaved.mask = call <4 x i1> @llvm.vector.interleave2.v4i1(<2 x i1> %mask, <2 x i1> %mask) |
| %wide.masked.vec = call <4 x double> @llvm.masked.load.v4f64.p0(ptr %p, i32 8, <4 x i1> %interleaved.mask, <4 x double> poison) |
| %deinterleaved.vec = call { <2 x double>, <2 x double> } @llvm.vector.deinterleave2.v4f64(<4 x double> %wide.masked.vec) |
| ret { <2 x double>, <2 x double> } %deinterleaved.vec |
| } |
| |