| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=aarch64 -mattr=+sve \ |
| ; RUN: -aarch64-sve-vector-bits-min=256 | FileCheck --check-prefixes=CHECK-i32 %s |
| ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=aarch64 -mattr=+sve \ |
| ; RUN: -aarch64-sve-vector-bits-min=256 | FileCheck --check-prefixes=CHECK-i64 %s |
| |
| define <1 x iXLen> @lrint_v1f16(<1 x half> %x) { |
| ; CHECK-i32-LABEL: lrint_v1f16: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: frintx h0, h0 |
| ; CHECK-i32-NEXT: fcvtzs w8, h0 |
| ; CHECK-i32-NEXT: fmov s0, w8 |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v1f16: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: frintx h0, h0 |
| ; CHECK-i64-NEXT: fcvtzs x8, h0 |
| ; CHECK-i64-NEXT: fmov d0, x8 |
| ; CHECK-i64-NEXT: ret |
| %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x) |
| ret <1 x iXLen> %a |
| } |
| declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>) |
| |
| define <2 x iXLen> @lrint_v2f16(<2 x half> %x) { |
| ; CHECK-i32-LABEL: lrint_v2f16: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-i32-NEXT: mov h1, v0.h[1] |
| ; CHECK-i32-NEXT: frintx h0, h0 |
| ; CHECK-i32-NEXT: frintx h1, h1 |
| ; CHECK-i32-NEXT: fcvtzs w8, h0 |
| ; CHECK-i32-NEXT: fcvtzs w9, h1 |
| ; CHECK-i32-NEXT: fmov s0, w8 |
| ; CHECK-i32-NEXT: mov v0.s[1], w9 |
| ; CHECK-i32-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v2f16: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-i64-NEXT: mov h1, v0.h[1] |
| ; CHECK-i64-NEXT: frintx h0, h0 |
| ; CHECK-i64-NEXT: frintx h1, h1 |
| ; CHECK-i64-NEXT: fcvtzs x8, h0 |
| ; CHECK-i64-NEXT: fcvtzs x9, h1 |
| ; CHECK-i64-NEXT: fmov d0, x8 |
| ; CHECK-i64-NEXT: mov v0.d[1], x9 |
| ; CHECK-i64-NEXT: ret |
| %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x) |
| ret <2 x iXLen> %a |
| } |
| declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>) |
| |
| define <4 x iXLen> @lrint_v4f16(<4 x half> %x) { |
| ; CHECK-i32-LABEL: lrint_v4f16: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: frintx v0.4h, v0.4h |
| ; CHECK-i32-NEXT: fcvtl v0.4s, v0.4h |
| ; CHECK-i32-NEXT: fcvtzs v0.4s, v0.4s |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v4f16: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: frintx v0.4h, v0.4h |
| ; CHECK-i64-NEXT: mov h1, v0.h[2] |
| ; CHECK-i64-NEXT: mov h2, v0.h[3] |
| ; CHECK-i64-NEXT: mov h3, v0.h[1] |
| ; CHECK-i64-NEXT: fcvtzs x9, h0 |
| ; CHECK-i64-NEXT: fcvtzs x8, h1 |
| ; CHECK-i64-NEXT: fcvtzs x10, h2 |
| ; CHECK-i64-NEXT: fcvtzs x11, h3 |
| ; CHECK-i64-NEXT: fmov d0, x9 |
| ; CHECK-i64-NEXT: fmov d1, x8 |
| ; CHECK-i64-NEXT: mov v0.d[1], x11 |
| ; CHECK-i64-NEXT: mov v1.d[1], x10 |
| ; CHECK-i64-NEXT: ret |
| %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x) |
| ret <4 x iXLen> %a |
| } |
| declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>) |
| |
| define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { |
| ; CHECK-i32-LABEL: lrint_v8f16: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: frintx v2.8h, v0.8h |
| ; CHECK-i32-NEXT: mov h0, v2.h[4] |
| ; CHECK-i32-NEXT: mov h1, v2.h[5] |
| ; CHECK-i32-NEXT: mov h3, v2.h[1] |
| ; CHECK-i32-NEXT: fcvtzs w9, h2 |
| ; CHECK-i32-NEXT: mov h4, v2.h[6] |
| ; CHECK-i32-NEXT: fcvtzs w8, h0 |
| ; CHECK-i32-NEXT: mov h0, v2.h[2] |
| ; CHECK-i32-NEXT: fcvtzs w10, h1 |
| ; CHECK-i32-NEXT: fcvtzs w11, h3 |
| ; CHECK-i32-NEXT: mov h3, v2.h[7] |
| ; CHECK-i32-NEXT: fcvtzs w12, h4 |
| ; CHECK-i32-NEXT: mov h2, v2.h[3] |
| ; CHECK-i32-NEXT: fcvtzs w13, h0 |
| ; CHECK-i32-NEXT: fmov s0, w9 |
| ; CHECK-i32-NEXT: fmov s1, w8 |
| ; CHECK-i32-NEXT: fcvtzs w8, h3 |
| ; CHECK-i32-NEXT: fcvtzs w9, h2 |
| ; CHECK-i32-NEXT: mov v0.s[1], w11 |
| ; CHECK-i32-NEXT: mov v1.s[1], w10 |
| ; CHECK-i32-NEXT: mov v0.s[2], w13 |
| ; CHECK-i32-NEXT: mov v1.s[2], w12 |
| ; CHECK-i32-NEXT: mov v0.s[3], w9 |
| ; CHECK-i32-NEXT: mov v1.s[3], w8 |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v8f16: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-i64-NEXT: frintx v0.4h, v0.4h |
| ; CHECK-i64-NEXT: frintx v1.4h, v1.4h |
| ; CHECK-i64-NEXT: mov h4, v0.h[2] |
| ; CHECK-i64-NEXT: mov h2, v0.h[1] |
| ; CHECK-i64-NEXT: mov h7, v0.h[3] |
| ; CHECK-i64-NEXT: fcvtzs x8, h0 |
| ; CHECK-i64-NEXT: mov h3, v1.h[2] |
| ; CHECK-i64-NEXT: mov h5, v1.h[3] |
| ; CHECK-i64-NEXT: mov h6, v1.h[1] |
| ; CHECK-i64-NEXT: fcvtzs x11, h1 |
| ; CHECK-i64-NEXT: fcvtzs x12, h4 |
| ; CHECK-i64-NEXT: fcvtzs x9, h2 |
| ; CHECK-i64-NEXT: fcvtzs x15, h7 |
| ; CHECK-i64-NEXT: fmov d0, x8 |
| ; CHECK-i64-NEXT: fcvtzs x10, h3 |
| ; CHECK-i64-NEXT: fcvtzs x13, h5 |
| ; CHECK-i64-NEXT: fcvtzs x14, h6 |
| ; CHECK-i64-NEXT: fmov d1, x12 |
| ; CHECK-i64-NEXT: fmov d2, x11 |
| ; CHECK-i64-NEXT: mov v0.d[1], x9 |
| ; CHECK-i64-NEXT: fmov d3, x10 |
| ; CHECK-i64-NEXT: mov v1.d[1], x15 |
| ; CHECK-i64-NEXT: mov v2.d[1], x14 |
| ; CHECK-i64-NEXT: mov v3.d[1], x13 |
| ; CHECK-i64-NEXT: ret |
| %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x) |
| ret <8 x iXLen> %a |
| } |
| declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>) |
| |
| define <16 x iXLen> @lrint_v16f16(<16 x half> %x) { |
| ; CHECK-i32-LABEL: lrint_v16f16: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: frintx v1.8h, v1.8h |
| ; CHECK-i32-NEXT: frintx v0.8h, v0.8h |
| ; CHECK-i32-NEXT: mov h3, v1.h[4] |
| ; CHECK-i32-NEXT: mov h2, v1.h[5] |
| ; CHECK-i32-NEXT: mov h5, v0.h[4] |
| ; CHECK-i32-NEXT: mov h4, v1.h[1] |
| ; CHECK-i32-NEXT: mov h6, v0.h[1] |
| ; CHECK-i32-NEXT: fcvtzs w11, h0 |
| ; CHECK-i32-NEXT: fcvtzs w14, h1 |
| ; CHECK-i32-NEXT: mov h7, v1.h[6] |
| ; CHECK-i32-NEXT: mov h16, v1.h[3] |
| ; CHECK-i32-NEXT: mov h17, v0.h[7] |
| ; CHECK-i32-NEXT: mov h18, v0.h[3] |
| ; CHECK-i32-NEXT: fcvtzs w9, h3 |
| ; CHECK-i32-NEXT: mov h3, v0.h[5] |
| ; CHECK-i32-NEXT: fcvtzs w8, h2 |
| ; CHECK-i32-NEXT: mov h2, v1.h[2] |
| ; CHECK-i32-NEXT: fcvtzs w12, h5 |
| ; CHECK-i32-NEXT: fcvtzs w10, h4 |
| ; CHECK-i32-NEXT: mov h4, v0.h[6] |
| ; CHECK-i32-NEXT: mov h5, v0.h[2] |
| ; CHECK-i32-NEXT: fcvtzs w13, h6 |
| ; CHECK-i32-NEXT: mov h6, v1.h[7] |
| ; CHECK-i32-NEXT: fmov s0, w11 |
| ; CHECK-i32-NEXT: fcvtzs w16, h7 |
| ; CHECK-i32-NEXT: fcvtzs w15, h3 |
| ; CHECK-i32-NEXT: fmov s3, w9 |
| ; CHECK-i32-NEXT: fcvtzs w9, h16 |
| ; CHECK-i32-NEXT: fcvtzs w17, h2 |
| ; CHECK-i32-NEXT: fmov s1, w12 |
| ; CHECK-i32-NEXT: fmov s2, w14 |
| ; CHECK-i32-NEXT: fcvtzs w11, h4 |
| ; CHECK-i32-NEXT: fcvtzs w18, h5 |
| ; CHECK-i32-NEXT: mov v0.s[1], w13 |
| ; CHECK-i32-NEXT: mov v3.s[1], w8 |
| ; CHECK-i32-NEXT: fcvtzs w8, h6 |
| ; CHECK-i32-NEXT: fcvtzs w12, h18 |
| ; CHECK-i32-NEXT: mov v1.s[1], w15 |
| ; CHECK-i32-NEXT: mov v2.s[1], w10 |
| ; CHECK-i32-NEXT: fcvtzs w10, h17 |
| ; CHECK-i32-NEXT: mov v0.s[2], w18 |
| ; CHECK-i32-NEXT: mov v3.s[2], w16 |
| ; CHECK-i32-NEXT: mov v1.s[2], w11 |
| ; CHECK-i32-NEXT: mov v2.s[2], w17 |
| ; CHECK-i32-NEXT: mov v0.s[3], w12 |
| ; CHECK-i32-NEXT: mov v3.s[3], w8 |
| ; CHECK-i32-NEXT: mov v1.s[3], w10 |
| ; CHECK-i32-NEXT: mov v2.s[3], w9 |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v16f16: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: ext v2.16b, v1.16b, v1.16b, #8 |
| ; CHECK-i64-NEXT: frintx v1.4h, v1.4h |
| ; CHECK-i64-NEXT: frintx v3.4h, v0.4h |
| ; CHECK-i64-NEXT: ext v0.16b, v0.16b, v0.16b, #8 |
| ; CHECK-i64-NEXT: frintx v2.4h, v2.4h |
| ; CHECK-i64-NEXT: mov h4, v1.h[2] |
| ; CHECK-i64-NEXT: mov h5, v3.h[2] |
| ; CHECK-i64-NEXT: frintx v0.4h, v0.4h |
| ; CHECK-i64-NEXT: mov h6, v3.h[1] |
| ; CHECK-i64-NEXT: fcvtzs x9, h3 |
| ; CHECK-i64-NEXT: mov h16, v1.h[1] |
| ; CHECK-i64-NEXT: fcvtzs x12, h1 |
| ; CHECK-i64-NEXT: mov h3, v3.h[3] |
| ; CHECK-i64-NEXT: mov h17, v1.h[3] |
| ; CHECK-i64-NEXT: mov h7, v2.h[3] |
| ; CHECK-i64-NEXT: fcvtzs x8, h4 |
| ; CHECK-i64-NEXT: fcvtzs x10, h5 |
| ; CHECK-i64-NEXT: mov h4, v2.h[2] |
| ; CHECK-i64-NEXT: mov h5, v0.h[2] |
| ; CHECK-i64-NEXT: fcvtzs x11, h6 |
| ; CHECK-i64-NEXT: mov h6, v0.h[3] |
| ; CHECK-i64-NEXT: fcvtzs x15, h2 |
| ; CHECK-i64-NEXT: mov h2, v2.h[1] |
| ; CHECK-i64-NEXT: fcvtzs x14, h0 |
| ; CHECK-i64-NEXT: fcvtzs x17, h3 |
| ; CHECK-i64-NEXT: fcvtzs x0, h17 |
| ; CHECK-i64-NEXT: fcvtzs x13, h7 |
| ; CHECK-i64-NEXT: mov h7, v0.h[1] |
| ; CHECK-i64-NEXT: fmov d0, x9 |
| ; CHECK-i64-NEXT: fcvtzs x16, h4 |
| ; CHECK-i64-NEXT: fcvtzs x9, h5 |
| ; CHECK-i64-NEXT: fmov d4, x12 |
| ; CHECK-i64-NEXT: fcvtzs x12, h16 |
| ; CHECK-i64-NEXT: fmov d1, x10 |
| ; CHECK-i64-NEXT: fcvtzs x10, h6 |
| ; CHECK-i64-NEXT: fmov d5, x8 |
| ; CHECK-i64-NEXT: fcvtzs x8, h2 |
| ; CHECK-i64-NEXT: fmov d2, x14 |
| ; CHECK-i64-NEXT: fcvtzs x18, h7 |
| ; CHECK-i64-NEXT: fmov d6, x15 |
| ; CHECK-i64-NEXT: mov v0.d[1], x11 |
| ; CHECK-i64-NEXT: fmov d3, x9 |
| ; CHECK-i64-NEXT: fmov d7, x16 |
| ; CHECK-i64-NEXT: mov v1.d[1], x17 |
| ; CHECK-i64-NEXT: mov v4.d[1], x12 |
| ; CHECK-i64-NEXT: mov v5.d[1], x0 |
| ; CHECK-i64-NEXT: mov v6.d[1], x8 |
| ; CHECK-i64-NEXT: mov v2.d[1], x18 |
| ; CHECK-i64-NEXT: mov v3.d[1], x10 |
| ; CHECK-i64-NEXT: mov v7.d[1], x13 |
| ; CHECK-i64-NEXT: ret |
| %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x) |
| ret <16 x iXLen> %a |
| } |
| declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>) |
| |
| define <32 x iXLen> @lrint_v32f16(<32 x half> %x) { |
| ; CHECK-i32-LABEL: lrint_v32f16: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill |
| ; CHECK-i32-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-i32-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-i32-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-i32-NEXT: .cfi_def_cfa_offset 64 |
| ; CHECK-i32-NEXT: .cfi_offset w19, -8 |
| ; CHECK-i32-NEXT: .cfi_offset w20, -16 |
| ; CHECK-i32-NEXT: .cfi_offset w21, -24 |
| ; CHECK-i32-NEXT: .cfi_offset w22, -32 |
| ; CHECK-i32-NEXT: .cfi_offset w23, -40 |
| ; CHECK-i32-NEXT: .cfi_offset w24, -48 |
| ; CHECK-i32-NEXT: .cfi_offset w25, -56 |
| ; CHECK-i32-NEXT: .cfi_offset w26, -64 |
| ; CHECK-i32-NEXT: frintx v3.8h, v3.8h |
| ; CHECK-i32-NEXT: frintx v2.8h, v2.8h |
| ; CHECK-i32-NEXT: frintx v1.8h, v1.8h |
| ; CHECK-i32-NEXT: frintx v0.8h, v0.8h |
| ; CHECK-i32-NEXT: mov h4, v3.h[7] |
| ; CHECK-i32-NEXT: mov h5, v3.h[6] |
| ; CHECK-i32-NEXT: mov h6, v3.h[5] |
| ; CHECK-i32-NEXT: mov h7, v3.h[4] |
| ; CHECK-i32-NEXT: mov h16, v3.h[3] |
| ; CHECK-i32-NEXT: mov h17, v3.h[2] |
| ; CHECK-i32-NEXT: mov h18, v3.h[1] |
| ; CHECK-i32-NEXT: mov h19, v2.h[7] |
| ; CHECK-i32-NEXT: fcvtzs w1, h3 |
| ; CHECK-i32-NEXT: mov h3, v1.h[6] |
| ; CHECK-i32-NEXT: fcvtzs w7, h2 |
| ; CHECK-i32-NEXT: fcvtzs w22, h0 |
| ; CHECK-i32-NEXT: fcvtzs w8, h4 |
| ; CHECK-i32-NEXT: mov h4, v2.h[6] |
| ; CHECK-i32-NEXT: fcvtzs w10, h5 |
| ; CHECK-i32-NEXT: mov h5, v2.h[5] |
| ; CHECK-i32-NEXT: fcvtzs w12, h6 |
| ; CHECK-i32-NEXT: mov h6, v2.h[4] |
| ; CHECK-i32-NEXT: fcvtzs w13, h7 |
| ; CHECK-i32-NEXT: mov h7, v2.h[3] |
| ; CHECK-i32-NEXT: fcvtzs w9, h16 |
| ; CHECK-i32-NEXT: fcvtzs w11, h17 |
| ; CHECK-i32-NEXT: mov h16, v2.h[2] |
| ; CHECK-i32-NEXT: mov h17, v2.h[1] |
| ; CHECK-i32-NEXT: fcvtzs w17, h4 |
| ; CHECK-i32-NEXT: mov h4, v1.h[5] |
| ; CHECK-i32-NEXT: mov h2, v0.h[5] |
| ; CHECK-i32-NEXT: fcvtzs w0, h5 |
| ; CHECK-i32-NEXT: fcvtzs w3, h6 |
| ; CHECK-i32-NEXT: mov h5, v1.h[4] |
| ; CHECK-i32-NEXT: mov h6, v0.h[4] |
| ; CHECK-i32-NEXT: fcvtzs w16, h7 |
| ; CHECK-i32-NEXT: mov h7, v0.h[1] |
| ; CHECK-i32-NEXT: fcvtzs w15, h18 |
| ; CHECK-i32-NEXT: fcvtzs w2, h3 |
| ; CHECK-i32-NEXT: mov h3, v1.h[2] |
| ; CHECK-i32-NEXT: fcvtzs w19, h4 |
| ; CHECK-i32-NEXT: mov h4, v1.h[1] |
| ; CHECK-i32-NEXT: mov h18, v0.h[6] |
| ; CHECK-i32-NEXT: fcvtzs w20, h5 |
| ; CHECK-i32-NEXT: fcvtzs w23, h2 |
| ; CHECK-i32-NEXT: mov h2, v0.h[2] |
| ; CHECK-i32-NEXT: fcvtzs w21, h6 |
| ; CHECK-i32-NEXT: fcvtzs w25, h1 |
| ; CHECK-i32-NEXT: fcvtzs w4, h17 |
| ; CHECK-i32-NEXT: fcvtzs w24, h7 |
| ; CHECK-i32-NEXT: fcvtzs w14, h19 |
| ; CHECK-i32-NEXT: fcvtzs w18, h16 |
| ; CHECK-i32-NEXT: fcvtzs w26, h4 |
| ; CHECK-i32-NEXT: mov h16, v1.h[7] |
| ; CHECK-i32-NEXT: mov h17, v1.h[3] |
| ; CHECK-i32-NEXT: fcvtzs w5, h3 |
| ; CHECK-i32-NEXT: mov h19, v0.h[7] |
| ; CHECK-i32-NEXT: fcvtzs w6, h18 |
| ; CHECK-i32-NEXT: mov h18, v0.h[3] |
| ; CHECK-i32-NEXT: fmov s0, w22 |
| ; CHECK-i32-NEXT: fmov s1, w21 |
| ; CHECK-i32-NEXT: fcvtzs w21, h2 |
| ; CHECK-i32-NEXT: fmov s2, w25 |
| ; CHECK-i32-NEXT: fmov s3, w20 |
| ; CHECK-i32-NEXT: fmov s4, w7 |
| ; CHECK-i32-NEXT: fmov s5, w3 |
| ; CHECK-i32-NEXT: fmov s6, w1 |
| ; CHECK-i32-NEXT: fmov s7, w13 |
| ; CHECK-i32-NEXT: mov v0.s[1], w24 |
| ; CHECK-i32-NEXT: mov v1.s[1], w23 |
| ; CHECK-i32-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-i32-NEXT: mov v2.s[1], w26 |
| ; CHECK-i32-NEXT: mov v3.s[1], w19 |
| ; CHECK-i32-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-i32-NEXT: mov v4.s[1], w4 |
| ; CHECK-i32-NEXT: mov v5.s[1], w0 |
| ; CHECK-i32-NEXT: mov v6.s[1], w15 |
| ; CHECK-i32-NEXT: mov v7.s[1], w12 |
| ; CHECK-i32-NEXT: fcvtzs w12, h16 |
| ; CHECK-i32-NEXT: fcvtzs w13, h17 |
| ; CHECK-i32-NEXT: fcvtzs w15, h19 |
| ; CHECK-i32-NEXT: fcvtzs w0, h18 |
| ; CHECK-i32-NEXT: mov v0.s[2], w21 |
| ; CHECK-i32-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-i32-NEXT: mov v1.s[2], w6 |
| ; CHECK-i32-NEXT: mov v2.s[2], w5 |
| ; CHECK-i32-NEXT: mov v3.s[2], w2 |
| ; CHECK-i32-NEXT: mov v4.s[2], w18 |
| ; CHECK-i32-NEXT: mov v5.s[2], w17 |
| ; CHECK-i32-NEXT: mov v6.s[2], w11 |
| ; CHECK-i32-NEXT: mov v7.s[2], w10 |
| ; CHECK-i32-NEXT: mov v0.s[3], w0 |
| ; CHECK-i32-NEXT: mov v1.s[3], w15 |
| ; CHECK-i32-NEXT: mov v2.s[3], w13 |
| ; CHECK-i32-NEXT: mov v3.s[3], w12 |
| ; CHECK-i32-NEXT: mov v4.s[3], w16 |
| ; CHECK-i32-NEXT: mov v5.s[3], w14 |
| ; CHECK-i32-NEXT: mov v6.s[3], w9 |
| ; CHECK-i32-NEXT: mov v7.s[3], w8 |
| ; CHECK-i32-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v32f16: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-i64-NEXT: sub x9, sp, #272 |
| ; CHECK-i64-NEXT: mov x29, sp |
| ; CHECK-i64-NEXT: and sp, x9, #0xffffffffffffffe0 |
| ; CHECK-i64-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK-i64-NEXT: .cfi_offset w30, -8 |
| ; CHECK-i64-NEXT: .cfi_offset w29, -16 |
| ; CHECK-i64-NEXT: frintx v5.4h, v0.4h |
| ; CHECK-i64-NEXT: ext v0.16b, v0.16b, v0.16b, #8 |
| ; CHECK-i64-NEXT: ext v4.16b, v1.16b, v1.16b, #8 |
| ; CHECK-i64-NEXT: ext v17.16b, v2.16b, v2.16b, #8 |
| ; CHECK-i64-NEXT: frintx v1.4h, v1.4h |
| ; CHECK-i64-NEXT: frintx v2.4h, v2.4h |
| ; CHECK-i64-NEXT: ptrue p0.d, vl4 |
| ; CHECK-i64-NEXT: mov h6, v5.h[3] |
| ; CHECK-i64-NEXT: frintx v0.4h, v0.4h |
| ; CHECK-i64-NEXT: mov h7, v5.h[2] |
| ; CHECK-i64-NEXT: mov h16, v5.h[1] |
| ; CHECK-i64-NEXT: frintx v4.4h, v4.4h |
| ; CHECK-i64-NEXT: fcvtzs x12, h5 |
| ; CHECK-i64-NEXT: ext v5.16b, v3.16b, v3.16b, #8 |
| ; CHECK-i64-NEXT: frintx v17.4h, v17.4h |
| ; CHECK-i64-NEXT: frintx v3.4h, v3.4h |
| ; CHECK-i64-NEXT: fcvtzs x9, h6 |
| ; CHECK-i64-NEXT: mov h6, v0.h[3] |
| ; CHECK-i64-NEXT: fcvtzs x10, h7 |
| ; CHECK-i64-NEXT: mov h7, v0.h[2] |
| ; CHECK-i64-NEXT: fcvtzs x11, h16 |
| ; CHECK-i64-NEXT: mov h16, v0.h[1] |
| ; CHECK-i64-NEXT: fcvtzs x13, h6 |
| ; CHECK-i64-NEXT: mov h6, v4.h[3] |
| ; CHECK-i64-NEXT: stp x10, x9, [sp, #48] |
| ; CHECK-i64-NEXT: fcvtzs x9, h7 |
| ; CHECK-i64-NEXT: mov h7, v4.h[2] |
| ; CHECK-i64-NEXT: fcvtzs x10, h16 |
| ; CHECK-i64-NEXT: mov h16, v4.h[1] |
| ; CHECK-i64-NEXT: stp x12, x11, [sp, #32] |
| ; CHECK-i64-NEXT: fcvtzs x11, h0 |
| ; CHECK-i64-NEXT: frintx v0.4h, v5.4h |
| ; CHECK-i64-NEXT: mov h5, v17.h[3] |
| ; CHECK-i64-NEXT: fcvtzs x12, h6 |
| ; CHECK-i64-NEXT: mov h6, v17.h[2] |
| ; CHECK-i64-NEXT: stp x9, x13, [sp, #16] |
| ; CHECK-i64-NEXT: fcvtzs x13, h7 |
| ; CHECK-i64-NEXT: mov h7, v17.h[1] |
| ; CHECK-i64-NEXT: fcvtzs x9, h16 |
| ; CHECK-i64-NEXT: stp x11, x10, [sp] |
| ; CHECK-i64-NEXT: fcvtzs x10, h4 |
| ; CHECK-i64-NEXT: fcvtzs x11, h5 |
| ; CHECK-i64-NEXT: mov h4, v0.h[3] |
| ; CHECK-i64-NEXT: mov h5, v0.h[2] |
| ; CHECK-i64-NEXT: stp x13, x12, [sp, #80] |
| ; CHECK-i64-NEXT: fcvtzs x12, h6 |
| ; CHECK-i64-NEXT: fcvtzs x13, h7 |
| ; CHECK-i64-NEXT: mov h6, v0.h[1] |
| ; CHECK-i64-NEXT: stp x10, x9, [sp, #64] |
| ; CHECK-i64-NEXT: fcvtzs x9, h17 |
| ; CHECK-i64-NEXT: mov h7, v1.h[3] |
| ; CHECK-i64-NEXT: fcvtzs x10, h4 |
| ; CHECK-i64-NEXT: mov h4, v1.h[2] |
| ; CHECK-i64-NEXT: stp x12, x11, [sp, #144] |
| ; CHECK-i64-NEXT: fcvtzs x11, h5 |
| ; CHECK-i64-NEXT: mov h5, v1.h[1] |
| ; CHECK-i64-NEXT: fcvtzs x12, h6 |
| ; CHECK-i64-NEXT: stp x9, x13, [sp, #128] |
| ; CHECK-i64-NEXT: fcvtzs x9, h0 |
| ; CHECK-i64-NEXT: fcvtzs x13, h7 |
| ; CHECK-i64-NEXT: mov h0, v2.h[3] |
| ; CHECK-i64-NEXT: stp x11, x10, [sp, #208] |
| ; CHECK-i64-NEXT: fcvtzs x10, h4 |
| ; CHECK-i64-NEXT: mov h4, v2.h[2] |
| ; CHECK-i64-NEXT: fcvtzs x11, h5 |
| ; CHECK-i64-NEXT: mov h5, v2.h[1] |
| ; CHECK-i64-NEXT: stp x9, x12, [sp, #192] |
| ; CHECK-i64-NEXT: fcvtzs x9, h1 |
| ; CHECK-i64-NEXT: fcvtzs x12, h0 |
| ; CHECK-i64-NEXT: mov h0, v3.h[3] |
| ; CHECK-i64-NEXT: mov h1, v3.h[2] |
| ; CHECK-i64-NEXT: stp x10, x13, [sp, #112] |
| ; CHECK-i64-NEXT: fcvtzs x10, h4 |
| ; CHECK-i64-NEXT: mov h4, v3.h[1] |
| ; CHECK-i64-NEXT: fcvtzs x13, h5 |
| ; CHECK-i64-NEXT: stp x9, x11, [sp, #96] |
| ; CHECK-i64-NEXT: fcvtzs x9, h2 |
| ; CHECK-i64-NEXT: fcvtzs x11, h0 |
| ; CHECK-i64-NEXT: stp x10, x12, [sp, #176] |
| ; CHECK-i64-NEXT: fcvtzs x10, h1 |
| ; CHECK-i64-NEXT: fcvtzs x12, h4 |
| ; CHECK-i64-NEXT: stp x9, x13, [sp, #160] |
| ; CHECK-i64-NEXT: fcvtzs x9, h3 |
| ; CHECK-i64-NEXT: stp x10, x11, [sp, #240] |
| ; CHECK-i64-NEXT: add x10, sp, #64 |
| ; CHECK-i64-NEXT: stp x9, x12, [sp, #224] |
| ; CHECK-i64-NEXT: add x9, sp, #32 |
| ; CHECK-i64-NEXT: ld1d { z0.d }, p0/z, [x9] |
| ; CHECK-i64-NEXT: mov x9, sp |
| ; CHECK-i64-NEXT: ld1d { z2.d }, p0/z, [x10] |
| ; CHECK-i64-NEXT: ld1d { z1.d }, p0/z, [x9] |
| ; CHECK-i64-NEXT: add x9, sp, #224 |
| ; CHECK-i64-NEXT: add x10, sp, #128 |
| ; CHECK-i64-NEXT: ld1d { z3.d }, p0/z, [x9] |
| ; CHECK-i64-NEXT: add x9, sp, #160 |
| ; CHECK-i64-NEXT: ld1d { z4.d }, p0/z, [x10] |
| ; CHECK-i64-NEXT: add x10, sp, #96 |
| ; CHECK-i64-NEXT: ld1d { z5.d }, p0/z, [x9] |
| ; CHECK-i64-NEXT: add x9, sp, #192 |
| ; CHECK-i64-NEXT: ld1d { z6.d }, p0/z, [x10] |
| ; CHECK-i64-NEXT: mov x10, #24 // =0x18 |
| ; CHECK-i64-NEXT: ld1d { z7.d }, p0/z, [x9] |
| ; CHECK-i64-NEXT: mov x9, #16 // =0x10 |
| ; CHECK-i64-NEXT: st1d { z3.d }, p0, [x8, x10, lsl #3] |
| ; CHECK-i64-NEXT: st1d { z5.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: mov x9, #8 // =0x8 |
| ; CHECK-i64-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: mov x9, #28 // =0x1c |
| ; CHECK-i64-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: mov x9, #20 // =0x14 |
| ; CHECK-i64-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: mov x9, #12 // =0xc |
| ; CHECK-i64-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: mov x9, #4 // =0x4 |
| ; CHECK-i64-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: st1d { z0.d }, p0, [x8] |
| ; CHECK-i64-NEXT: mov sp, x29 |
| ; CHECK-i64-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-i64-NEXT: ret |
| %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half> %x) |
| ret <32 x iXLen> %a |
| } |
| declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>) |
| |
| define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { |
| ; CHECK-i32-LABEL: lrint_v1f32: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: frintx v0.2s, v0.2s |
| ; CHECK-i32-NEXT: fcvtzs v0.2s, v0.2s |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v1f32: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-i64-NEXT: frintx s0, s0 |
| ; CHECK-i64-NEXT: fcvtzs x8, s0 |
| ; CHECK-i64-NEXT: fmov d0, x8 |
| ; CHECK-i64-NEXT: ret |
| %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x) |
| ret <1 x iXLen> %a |
| } |
| declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>) |
| |
| define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { |
| ; CHECK-i32-LABEL: lrint_v2f32: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: frintx v0.2s, v0.2s |
| ; CHECK-i32-NEXT: fcvtzs v0.2s, v0.2s |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v2f32: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: frintx v0.2s, v0.2s |
| ; CHECK-i64-NEXT: fcvtl v0.2d, v0.2s |
| ; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d |
| ; CHECK-i64-NEXT: ret |
| %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x) |
| ret <2 x iXLen> %a |
| } |
| declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>) |
| |
| define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { |
| ; CHECK-i32-LABEL: lrint_v4f32: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: frintx v0.4s, v0.4s |
| ; CHECK-i32-NEXT: fcvtzs v0.4s, v0.4s |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v4f32: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: frintx v0.4s, v0.4s |
| ; CHECK-i64-NEXT: mov s1, v0.s[2] |
| ; CHECK-i64-NEXT: mov s2, v0.s[3] |
| ; CHECK-i64-NEXT: mov s3, v0.s[1] |
| ; CHECK-i64-NEXT: fcvtzs x9, s0 |
| ; CHECK-i64-NEXT: fcvtzs x8, s1 |
| ; CHECK-i64-NEXT: fcvtzs x10, s2 |
| ; CHECK-i64-NEXT: fcvtzs x11, s3 |
| ; CHECK-i64-NEXT: fmov d0, x9 |
| ; CHECK-i64-NEXT: fmov d1, x8 |
| ; CHECK-i64-NEXT: mov v0.d[1], x11 |
| ; CHECK-i64-NEXT: mov v1.d[1], x10 |
| ; CHECK-i64-NEXT: ret |
| %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x) |
| ret <4 x iXLen> %a |
| } |
| declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>) |
| |
| define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { |
| ; CHECK-i32-LABEL: lrint_v8f32: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: ptrue p0.d, vl2 |
| ; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-i32-NEXT: splice z0.d, p0, z0.d, z1.d |
| ; CHECK-i32-NEXT: ptrue p0.s, vl8 |
| ; CHECK-i32-NEXT: movprfx z2, z0 |
| ; CHECK-i32-NEXT: frintx z2.s, p0/m, z0.s |
| ; CHECK-i32-NEXT: mov z0.s, z2.s[4] |
| ; CHECK-i32-NEXT: mov z1.s, z2.s[5] |
| ; CHECK-i32-NEXT: mov z3.s, z2.s[1] |
| ; CHECK-i32-NEXT: fcvtzs w9, s2 |
| ; CHECK-i32-NEXT: fcvtzs w8, s0 |
| ; CHECK-i32-NEXT: mov z0.s, z2.s[6] |
| ; CHECK-i32-NEXT: fcvtzs w10, s1 |
| ; CHECK-i32-NEXT: mov z1.s, z2.s[2] |
| ; CHECK-i32-NEXT: fcvtzs w11, s3 |
| ; CHECK-i32-NEXT: mov z3.s, z2.s[7] |
| ; CHECK-i32-NEXT: mov z2.s, z2.s[3] |
| ; CHECK-i32-NEXT: fcvtzs w12, s0 |
| ; CHECK-i32-NEXT: fmov s0, w9 |
| ; CHECK-i32-NEXT: fcvtzs w13, s1 |
| ; CHECK-i32-NEXT: fmov s1, w8 |
| ; CHECK-i32-NEXT: fcvtzs w8, s3 |
| ; CHECK-i32-NEXT: fcvtzs w9, s2 |
| ; CHECK-i32-NEXT: mov v0.s[1], w11 |
| ; CHECK-i32-NEXT: mov v1.s[1], w10 |
| ; CHECK-i32-NEXT: mov v0.s[2], w13 |
| ; CHECK-i32-NEXT: mov v1.s[2], w12 |
| ; CHECK-i32-NEXT: mov v0.s[3], w9 |
| ; CHECK-i32-NEXT: mov v1.s[3], w8 |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v8f32: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: frintx v0.4s, v0.4s |
| ; CHECK-i64-NEXT: frintx v1.4s, v1.4s |
| ; CHECK-i64-NEXT: mov s3, v1.s[2] |
| ; CHECK-i64-NEXT: mov s4, v0.s[2] |
| ; CHECK-i64-NEXT: mov s2, v0.s[1] |
| ; CHECK-i64-NEXT: mov s5, v1.s[3] |
| ; CHECK-i64-NEXT: mov s6, v1.s[1] |
| ; CHECK-i64-NEXT: mov s7, v0.s[3] |
| ; CHECK-i64-NEXT: fcvtzs x8, s0 |
| ; CHECK-i64-NEXT: fcvtzs x10, s1 |
| ; CHECK-i64-NEXT: fcvtzs x11, s3 |
| ; CHECK-i64-NEXT: fcvtzs x12, s4 |
| ; CHECK-i64-NEXT: fcvtzs x9, s2 |
| ; CHECK-i64-NEXT: fcvtzs x13, s5 |
| ; CHECK-i64-NEXT: fcvtzs x14, s6 |
| ; CHECK-i64-NEXT: fcvtzs x15, s7 |
| ; CHECK-i64-NEXT: fmov d0, x8 |
| ; CHECK-i64-NEXT: fmov d2, x10 |
| ; CHECK-i64-NEXT: fmov d1, x12 |
| ; CHECK-i64-NEXT: fmov d3, x11 |
| ; CHECK-i64-NEXT: mov v0.d[1], x9 |
| ; CHECK-i64-NEXT: mov v2.d[1], x14 |
| ; CHECK-i64-NEXT: mov v1.d[1], x15 |
| ; CHECK-i64-NEXT: mov v3.d[1], x13 |
| ; CHECK-i64-NEXT: ret |
| %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x) |
| ret <8 x iXLen> %a |
| } |
| declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>) |
| |
| define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { |
| ; CHECK-i32-LABEL: lrint_v16f32: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: ptrue p0.d, vl2 |
| ; CHECK-i32-NEXT: // kill: def $q2 killed $q2 def $z2 |
| ; CHECK-i32-NEXT: // kill: def $q3 killed $q3 def $z3 |
| ; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-i32-NEXT: splice z2.d, p0, z2.d, z3.d |
| ; CHECK-i32-NEXT: splice z0.d, p0, z0.d, z1.d |
| ; CHECK-i32-NEXT: ptrue p0.s, vl8 |
| ; CHECK-i32-NEXT: movprfx z1, z2 |
| ; CHECK-i32-NEXT: frintx z1.s, p0/m, z2.s |
| ; CHECK-i32-NEXT: frintx z0.s, p0/m, z0.s |
| ; CHECK-i32-NEXT: mov z2.s, z1.s[5] |
| ; CHECK-i32-NEXT: mov z3.s, z1.s[4] |
| ; CHECK-i32-NEXT: mov z5.s, z0.s[5] |
| ; CHECK-i32-NEXT: mov z7.s, z0.s[1] |
| ; CHECK-i32-NEXT: fcvtzs w11, s0 |
| ; CHECK-i32-NEXT: fcvtzs w13, s1 |
| ; CHECK-i32-NEXT: mov z4.s, z1.s[7] |
| ; CHECK-i32-NEXT: mov z6.s, z1.s[6] |
| ; CHECK-i32-NEXT: mov z16.s, z0.s[7] |
| ; CHECK-i32-NEXT: fcvtzs w8, s2 |
| ; CHECK-i32-NEXT: mov z2.s, z0.s[4] |
| ; CHECK-i32-NEXT: fcvtzs w9, s3 |
| ; CHECK-i32-NEXT: mov z3.s, z1.s[1] |
| ; CHECK-i32-NEXT: fcvtzs w10, s5 |
| ; CHECK-i32-NEXT: fcvtzs w12, s7 |
| ; CHECK-i32-NEXT: mov z5.s, z0.s[6] |
| ; CHECK-i32-NEXT: mov z7.s, z1.s[2] |
| ; CHECK-i32-NEXT: mov z17.s, z1.s[3] |
| ; CHECK-i32-NEXT: fcvtzs w14, s2 |
| ; CHECK-i32-NEXT: mov z2.s, z0.s[2] |
| ; CHECK-i32-NEXT: mov z18.s, z0.s[3] |
| ; CHECK-i32-NEXT: fcvtzs w15, s3 |
| ; CHECK-i32-NEXT: fmov s0, w11 |
| ; CHECK-i32-NEXT: fmov s3, w9 |
| ; CHECK-i32-NEXT: fcvtzs w16, s6 |
| ; CHECK-i32-NEXT: fcvtzs w17, s5 |
| ; CHECK-i32-NEXT: fcvtzs w11, s7 |
| ; CHECK-i32-NEXT: fcvtzs w18, s2 |
| ; CHECK-i32-NEXT: fmov s2, w13 |
| ; CHECK-i32-NEXT: fcvtzs w9, s16 |
| ; CHECK-i32-NEXT: fmov s1, w14 |
| ; CHECK-i32-NEXT: mov v0.s[1], w12 |
| ; CHECK-i32-NEXT: mov v3.s[1], w8 |
| ; CHECK-i32-NEXT: fcvtzs w8, s4 |
| ; CHECK-i32-NEXT: fcvtzs w12, s18 |
| ; CHECK-i32-NEXT: mov v2.s[1], w15 |
| ; CHECK-i32-NEXT: mov v1.s[1], w10 |
| ; CHECK-i32-NEXT: fcvtzs w10, s17 |
| ; CHECK-i32-NEXT: mov v0.s[2], w18 |
| ; CHECK-i32-NEXT: mov v3.s[2], w16 |
| ; CHECK-i32-NEXT: mov v2.s[2], w11 |
| ; CHECK-i32-NEXT: mov v1.s[2], w17 |
| ; CHECK-i32-NEXT: mov v0.s[3], w12 |
| ; CHECK-i32-NEXT: mov v3.s[3], w8 |
| ; CHECK-i32-NEXT: mov v2.s[3], w10 |
| ; CHECK-i32-NEXT: mov v1.s[3], w9 |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v16f32: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: frintx v3.4s, v3.4s |
| ; CHECK-i64-NEXT: frintx v2.4s, v2.4s |
| ; CHECK-i64-NEXT: frintx v1.4s, v1.4s |
| ; CHECK-i64-NEXT: frintx v0.4s, v0.4s |
| ; CHECK-i64-NEXT: mov s4, v3.s[2] |
| ; CHECK-i64-NEXT: mov s5, v2.s[2] |
| ; CHECK-i64-NEXT: mov s6, v1.s[2] |
| ; CHECK-i64-NEXT: mov s7, v0.s[2] |
| ; CHECK-i64-NEXT: fcvtzs x10, s1 |
| ; CHECK-i64-NEXT: fcvtzs x11, s0 |
| ; CHECK-i64-NEXT: mov s16, v0.s[1] |
| ; CHECK-i64-NEXT: mov s17, v1.s[1] |
| ; CHECK-i64-NEXT: mov s18, v3.s[1] |
| ; CHECK-i64-NEXT: fcvtzs x14, s3 |
| ; CHECK-i64-NEXT: fcvtzs x16, s2 |
| ; CHECK-i64-NEXT: fcvtzs x8, s4 |
| ; CHECK-i64-NEXT: mov s4, v2.s[1] |
| ; CHECK-i64-NEXT: fcvtzs x9, s5 |
| ; CHECK-i64-NEXT: mov s5, v1.s[3] |
| ; CHECK-i64-NEXT: fcvtzs x12, s6 |
| ; CHECK-i64-NEXT: mov s6, v0.s[3] |
| ; CHECK-i64-NEXT: fcvtzs x13, s7 |
| ; CHECK-i64-NEXT: mov s7, v3.s[3] |
| ; CHECK-i64-NEXT: fmov d0, x11 |
| ; CHECK-i64-NEXT: fcvtzs x17, s16 |
| ; CHECK-i64-NEXT: fcvtzs x18, s18 |
| ; CHECK-i64-NEXT: fcvtzs x15, s4 |
| ; CHECK-i64-NEXT: mov s4, v2.s[3] |
| ; CHECK-i64-NEXT: fmov d2, x10 |
| ; CHECK-i64-NEXT: fcvtzs x11, s5 |
| ; CHECK-i64-NEXT: fcvtzs x10, s6 |
| ; CHECK-i64-NEXT: fmov d3, x12 |
| ; CHECK-i64-NEXT: fmov d1, x13 |
| ; CHECK-i64-NEXT: fcvtzs x12, s17 |
| ; CHECK-i64-NEXT: fcvtzs x13, s7 |
| ; CHECK-i64-NEXT: fmov d5, x9 |
| ; CHECK-i64-NEXT: fmov d6, x14 |
| ; CHECK-i64-NEXT: fmov d7, x8 |
| ; CHECK-i64-NEXT: fcvtzs x0, s4 |
| ; CHECK-i64-NEXT: fmov d4, x16 |
| ; CHECK-i64-NEXT: mov v0.d[1], x17 |
| ; CHECK-i64-NEXT: mov v1.d[1], x10 |
| ; CHECK-i64-NEXT: mov v3.d[1], x11 |
| ; CHECK-i64-NEXT: mov v2.d[1], x12 |
| ; CHECK-i64-NEXT: mov v6.d[1], x18 |
| ; CHECK-i64-NEXT: mov v7.d[1], x13 |
| ; CHECK-i64-NEXT: mov v4.d[1], x15 |
| ; CHECK-i64-NEXT: mov v5.d[1], x0 |
| ; CHECK-i64-NEXT: ret |
| %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x) |
| ret <16 x iXLen> %a |
| } |
| declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>) |
| |
| define <32 x iXLen> @lrint_v32f32(<32 x float> %x) { |
| ; CHECK-i32-LABEL: lrint_v32f32: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: str x27, [sp, #-80]! // 8-byte Folded Spill |
| ; CHECK-i32-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-i32-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-i32-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-i32-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-i32-NEXT: .cfi_def_cfa_offset 80 |
| ; CHECK-i32-NEXT: .cfi_offset w19, -8 |
| ; CHECK-i32-NEXT: .cfi_offset w20, -16 |
| ; CHECK-i32-NEXT: .cfi_offset w21, -24 |
| ; CHECK-i32-NEXT: .cfi_offset w22, -32 |
| ; CHECK-i32-NEXT: .cfi_offset w23, -40 |
| ; CHECK-i32-NEXT: .cfi_offset w24, -48 |
| ; CHECK-i32-NEXT: .cfi_offset w25, -56 |
| ; CHECK-i32-NEXT: .cfi_offset w26, -64 |
| ; CHECK-i32-NEXT: .cfi_offset w27, -80 |
| ; CHECK-i32-NEXT: ptrue p1.d, vl2 |
| ; CHECK-i32-NEXT: // kill: def $q6 killed $q6 def $z6 |
| ; CHECK-i32-NEXT: // kill: def $q7 killed $q7 def $z7 |
| ; CHECK-i32-NEXT: // kill: def $q2 killed $q2 def $z2 |
| ; CHECK-i32-NEXT: // kill: def $q4 killed $q4 def $z4 |
| ; CHECK-i32-NEXT: // kill: def $q3 killed $q3 def $z3 |
| ; CHECK-i32-NEXT: // kill: def $q5 killed $q5 def $z5 |
| ; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-i32-NEXT: ptrue p0.s, vl8 |
| ; CHECK-i32-NEXT: splice z6.d, p1, z6.d, z7.d |
| ; CHECK-i32-NEXT: splice z2.d, p1, z2.d, z3.d |
| ; CHECK-i32-NEXT: splice z4.d, p1, z4.d, z5.d |
| ; CHECK-i32-NEXT: splice z0.d, p1, z0.d, z1.d |
| ; CHECK-i32-NEXT: movprfx z3, z6 |
| ; CHECK-i32-NEXT: frintx z3.s, p0/m, z6.s |
| ; CHECK-i32-NEXT: frintx z2.s, p0/m, z2.s |
| ; CHECK-i32-NEXT: movprfx z1, z4 |
| ; CHECK-i32-NEXT: frintx z1.s, p0/m, z4.s |
| ; CHECK-i32-NEXT: frintx z0.s, p0/m, z0.s |
| ; CHECK-i32-NEXT: mov z4.s, z3.s[7] |
| ; CHECK-i32-NEXT: mov z5.s, z3.s[6] |
| ; CHECK-i32-NEXT: mov z6.s, z3.s[5] |
| ; CHECK-i32-NEXT: mov z16.s, z1.s[7] |
| ; CHECK-i32-NEXT: mov z7.s, z3.s[4] |
| ; CHECK-i32-NEXT: mov z17.s, z1.s[6] |
| ; CHECK-i32-NEXT: mov z18.s, z1.s[5] |
| ; CHECK-i32-NEXT: mov z19.s, z1.s[4] |
| ; CHECK-i32-NEXT: fcvtzs w7, s3 |
| ; CHECK-i32-NEXT: fcvtzs w8, s4 |
| ; CHECK-i32-NEXT: mov z4.s, z2.s[7] |
| ; CHECK-i32-NEXT: fcvtzs w10, s5 |
| ; CHECK-i32-NEXT: mov z5.s, z2.s[6] |
| ; CHECK-i32-NEXT: fcvtzs w13, s6 |
| ; CHECK-i32-NEXT: fcvtzs w9, s16 |
| ; CHECK-i32-NEXT: mov z6.s, z2.s[4] |
| ; CHECK-i32-NEXT: mov z16.s, z0.s[6] |
| ; CHECK-i32-NEXT: fcvtzs w14, s7 |
| ; CHECK-i32-NEXT: fcvtzs w11, s4 |
| ; CHECK-i32-NEXT: mov z4.s, z2.s[5] |
| ; CHECK-i32-NEXT: mov z7.s, z0.s[7] |
| ; CHECK-i32-NEXT: fcvtzs w16, s5 |
| ; CHECK-i32-NEXT: mov z5.s, z0.s[4] |
| ; CHECK-i32-NEXT: fcvtzs w12, s17 |
| ; CHECK-i32-NEXT: fcvtzs w15, s18 |
| ; CHECK-i32-NEXT: fcvtzs w17, s19 |
| ; CHECK-i32-NEXT: mov z17.s, z0.s[5] |
| ; CHECK-i32-NEXT: fcvtzs w3, s4 |
| ; CHECK-i32-NEXT: mov z4.s, z3.s[1] |
| ; CHECK-i32-NEXT: mov z18.s, z3.s[2] |
| ; CHECK-i32-NEXT: fcvtzs w4, s6 |
| ; CHECK-i32-NEXT: fcvtzs w0, s16 |
| ; CHECK-i32-NEXT: fcvtzs w6, s5 |
| ; CHECK-i32-NEXT: mov z16.s, z3.s[3] |
| ; CHECK-i32-NEXT: mov z3.s, z0.s[1] |
| ; CHECK-i32-NEXT: mov z5.s, z1.s[1] |
| ; CHECK-i32-NEXT: mov z6.s, z2.s[1] |
| ; CHECK-i32-NEXT: fcvtzs w21, s1 |
| ; CHECK-i32-NEXT: fcvtzs w22, s0 |
| ; CHECK-i32-NEXT: fcvtzs w23, s2 |
| ; CHECK-i32-NEXT: fcvtzs w18, s7 |
| ; CHECK-i32-NEXT: fcvtzs w2, s4 |
| ; CHECK-i32-NEXT: mov z4.s, z1.s[2] |
| ; CHECK-i32-NEXT: mov z7.s, z2.s[2] |
| ; CHECK-i32-NEXT: fcvtzs w5, s17 |
| ; CHECK-i32-NEXT: fcvtzs w24, s3 |
| ; CHECK-i32-NEXT: fcvtzs w25, s5 |
| ; CHECK-i32-NEXT: fcvtzs w26, s6 |
| ; CHECK-i32-NEXT: fcvtzs w1, s18 |
| ; CHECK-i32-NEXT: mov z18.s, z0.s[2] |
| ; CHECK-i32-NEXT: mov z17.s, z1.s[3] |
| ; CHECK-i32-NEXT: fcvtzs w19, s4 |
| ; CHECK-i32-NEXT: mov z19.s, z2.s[3] |
| ; CHECK-i32-NEXT: fcvtzs w20, s7 |
| ; CHECK-i32-NEXT: mov z20.s, z0.s[3] |
| ; CHECK-i32-NEXT: fmov s0, w22 |
| ; CHECK-i32-NEXT: fmov s2, w23 |
| ; CHECK-i32-NEXT: fmov s4, w21 |
| ; CHECK-i32-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-i32-NEXT: fmov s1, w6 |
| ; CHECK-i32-NEXT: fmov s6, w7 |
| ; CHECK-i32-NEXT: fmov s3, w4 |
| ; CHECK-i32-NEXT: fmov s5, w17 |
| ; CHECK-i32-NEXT: fmov s7, w14 |
| ; CHECK-i32-NEXT: fcvtzs w27, s18 |
| ; CHECK-i32-NEXT: mov v0.s[1], w24 |
| ; CHECK-i32-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-i32-NEXT: mov v2.s[1], w26 |
| ; CHECK-i32-NEXT: mov v4.s[1], w25 |
| ; CHECK-i32-NEXT: mov v1.s[1], w5 |
| ; CHECK-i32-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-i32-NEXT: mov v3.s[1], w3 |
| ; CHECK-i32-NEXT: mov v6.s[1], w2 |
| ; CHECK-i32-NEXT: mov v5.s[1], w15 |
| ; CHECK-i32-NEXT: mov v7.s[1], w13 |
| ; CHECK-i32-NEXT: fcvtzs w13, s16 |
| ; CHECK-i32-NEXT: fcvtzs w14, s17 |
| ; CHECK-i32-NEXT: fcvtzs w15, s19 |
| ; CHECK-i32-NEXT: fcvtzs w17, s20 |
| ; CHECK-i32-NEXT: mov v0.s[2], w27 |
| ; CHECK-i32-NEXT: mov v1.s[2], w0 |
| ; CHECK-i32-NEXT: mov v2.s[2], w20 |
| ; CHECK-i32-NEXT: mov v4.s[2], w19 |
| ; CHECK-i32-NEXT: mov v3.s[2], w16 |
| ; CHECK-i32-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-i32-NEXT: mov v6.s[2], w1 |
| ; CHECK-i32-NEXT: mov v5.s[2], w12 |
| ; CHECK-i32-NEXT: mov v7.s[2], w10 |
| ; CHECK-i32-NEXT: mov v0.s[3], w17 |
| ; CHECK-i32-NEXT: mov v1.s[3], w18 |
| ; CHECK-i32-NEXT: mov v2.s[3], w15 |
| ; CHECK-i32-NEXT: mov v4.s[3], w14 |
| ; CHECK-i32-NEXT: mov v3.s[3], w11 |
| ; CHECK-i32-NEXT: mov v6.s[3], w13 |
| ; CHECK-i32-NEXT: mov v5.s[3], w9 |
| ; CHECK-i32-NEXT: mov v7.s[3], w8 |
| ; CHECK-i32-NEXT: ldr x27, [sp], #80 // 8-byte Folded Reload |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v32f32: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-i64-NEXT: sub x9, sp, #272 |
| ; CHECK-i64-NEXT: mov x29, sp |
| ; CHECK-i64-NEXT: and sp, x9, #0xffffffffffffffe0 |
| ; CHECK-i64-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK-i64-NEXT: .cfi_offset w30, -8 |
| ; CHECK-i64-NEXT: .cfi_offset w29, -16 |
| ; CHECK-i64-NEXT: frintx v0.4s, v0.4s |
| ; CHECK-i64-NEXT: frintx v1.4s, v1.4s |
| ; CHECK-i64-NEXT: frintx v2.4s, v2.4s |
| ; CHECK-i64-NEXT: ptrue p0.d, vl4 |
| ; CHECK-i64-NEXT: mov s16, v0.s[3] |
| ; CHECK-i64-NEXT: mov s17, v0.s[2] |
| ; CHECK-i64-NEXT: mov s18, v0.s[1] |
| ; CHECK-i64-NEXT: fcvtzs x12, s0 |
| ; CHECK-i64-NEXT: frintx v0.4s, v3.4s |
| ; CHECK-i64-NEXT: mov s3, v2.s[3] |
| ; CHECK-i64-NEXT: fcvtzs x9, s16 |
| ; CHECK-i64-NEXT: mov s16, v1.s[3] |
| ; CHECK-i64-NEXT: fcvtzs x10, s17 |
| ; CHECK-i64-NEXT: mov s17, v1.s[2] |
| ; CHECK-i64-NEXT: fcvtzs x11, s18 |
| ; CHECK-i64-NEXT: mov s18, v1.s[1] |
| ; CHECK-i64-NEXT: fcvtzs x13, s16 |
| ; CHECK-i64-NEXT: stp x10, x9, [sp, #16] |
| ; CHECK-i64-NEXT: mov s16, v2.s[2] |
| ; CHECK-i64-NEXT: fcvtzs x9, s17 |
| ; CHECK-i64-NEXT: fcvtzs x10, s18 |
| ; CHECK-i64-NEXT: mov s17, v2.s[1] |
| ; CHECK-i64-NEXT: stp x12, x11, [sp] |
| ; CHECK-i64-NEXT: fcvtzs x11, s1 |
| ; CHECK-i64-NEXT: frintx v1.4s, v4.4s |
| ; CHECK-i64-NEXT: fcvtzs x12, s3 |
| ; CHECK-i64-NEXT: mov s3, v0.s[3] |
| ; CHECK-i64-NEXT: mov s4, v0.s[2] |
| ; CHECK-i64-NEXT: stp x9, x13, [sp, #48] |
| ; CHECK-i64-NEXT: fcvtzs x13, s16 |
| ; CHECK-i64-NEXT: fcvtzs x9, s17 |
| ; CHECK-i64-NEXT: mov s16, v0.s[1] |
| ; CHECK-i64-NEXT: stp x11, x10, [sp, #32] |
| ; CHECK-i64-NEXT: fcvtzs x10, s2 |
| ; CHECK-i64-NEXT: frintx v2.4s, v5.4s |
| ; CHECK-i64-NEXT: fcvtzs x11, s3 |
| ; CHECK-i64-NEXT: mov s3, v1.s[3] |
| ; CHECK-i64-NEXT: mov s5, v1.s[1] |
| ; CHECK-i64-NEXT: stp x13, x12, [sp, #80] |
| ; CHECK-i64-NEXT: fcvtzs x12, s4 |
| ; CHECK-i64-NEXT: mov s4, v1.s[2] |
| ; CHECK-i64-NEXT: fcvtzs x13, s16 |
| ; CHECK-i64-NEXT: stp x10, x9, [sp, #64] |
| ; CHECK-i64-NEXT: fcvtzs x9, s0 |
| ; CHECK-i64-NEXT: mov s0, v2.s[3] |
| ; CHECK-i64-NEXT: fcvtzs x10, s3 |
| ; CHECK-i64-NEXT: frintx v3.4s, v6.4s |
| ; CHECK-i64-NEXT: stp x12, x11, [sp, #112] |
| ; CHECK-i64-NEXT: fcvtzs x11, s4 |
| ; CHECK-i64-NEXT: mov s4, v2.s[2] |
| ; CHECK-i64-NEXT: fcvtzs x12, s5 |
| ; CHECK-i64-NEXT: mov s5, v2.s[1] |
| ; CHECK-i64-NEXT: stp x9, x13, [sp, #96] |
| ; CHECK-i64-NEXT: fcvtzs x9, s1 |
| ; CHECK-i64-NEXT: fcvtzs x13, s0 |
| ; CHECK-i64-NEXT: mov s0, v3.s[3] |
| ; CHECK-i64-NEXT: frintx v1.4s, v7.4s |
| ; CHECK-i64-NEXT: stp x11, x10, [sp, #144] |
| ; CHECK-i64-NEXT: fcvtzs x10, s4 |
| ; CHECK-i64-NEXT: mov s4, v3.s[2] |
| ; CHECK-i64-NEXT: fcvtzs x11, s5 |
| ; CHECK-i64-NEXT: mov s5, v3.s[1] |
| ; CHECK-i64-NEXT: stp x9, x12, [sp, #128] |
| ; CHECK-i64-NEXT: fcvtzs x9, s2 |
| ; CHECK-i64-NEXT: fcvtzs x12, s0 |
| ; CHECK-i64-NEXT: mov s0, v1.s[3] |
| ; CHECK-i64-NEXT: mov s2, v1.s[2] |
| ; CHECK-i64-NEXT: stp x10, x13, [sp, #176] |
| ; CHECK-i64-NEXT: fcvtzs x10, s4 |
| ; CHECK-i64-NEXT: mov s4, v1.s[1] |
| ; CHECK-i64-NEXT: fcvtzs x13, s5 |
| ; CHECK-i64-NEXT: stp x9, x11, [sp, #160] |
| ; CHECK-i64-NEXT: fcvtzs x9, s3 |
| ; CHECK-i64-NEXT: fcvtzs x11, s0 |
| ; CHECK-i64-NEXT: stp x10, x12, [sp, #208] |
| ; CHECK-i64-NEXT: fcvtzs x10, s2 |
| ; CHECK-i64-NEXT: fcvtzs x12, s4 |
| ; CHECK-i64-NEXT: stp x9, x13, [sp, #192] |
| ; CHECK-i64-NEXT: fcvtzs x9, s1 |
| ; CHECK-i64-NEXT: stp x10, x11, [sp, #240] |
| ; CHECK-i64-NEXT: add x10, sp, #64 |
| ; CHECK-i64-NEXT: stp x9, x12, [sp, #224] |
| ; CHECK-i64-NEXT: mov x9, sp |
| ; CHECK-i64-NEXT: ld1d { z0.d }, p0/z, [x9] |
| ; CHECK-i64-NEXT: add x9, sp, #32 |
| ; CHECK-i64-NEXT: ld1d { z2.d }, p0/z, [x10] |
| ; CHECK-i64-NEXT: ld1d { z1.d }, p0/z, [x9] |
| ; CHECK-i64-NEXT: add x9, sp, #224 |
| ; CHECK-i64-NEXT: add x10, sp, #96 |
| ; CHECK-i64-NEXT: ld1d { z3.d }, p0/z, [x9] |
| ; CHECK-i64-NEXT: add x9, sp, #192 |
| ; CHECK-i64-NEXT: ld1d { z4.d }, p0/z, [x10] |
| ; CHECK-i64-NEXT: add x10, sp, #160 |
| ; CHECK-i64-NEXT: ld1d { z5.d }, p0/z, [x9] |
| ; CHECK-i64-NEXT: add x9, sp, #128 |
| ; CHECK-i64-NEXT: ld1d { z6.d }, p0/z, [x10] |
| ; CHECK-i64-NEXT: mov x10, #28 // =0x1c |
| ; CHECK-i64-NEXT: ld1d { z7.d }, p0/z, [x9] |
| ; CHECK-i64-NEXT: mov x9, #24 // =0x18 |
| ; CHECK-i64-NEXT: st1d { z3.d }, p0, [x8, x10, lsl #3] |
| ; CHECK-i64-NEXT: st1d { z5.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: mov x9, #20 // =0x14 |
| ; CHECK-i64-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: mov x9, #16 // =0x10 |
| ; CHECK-i64-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: mov x9, #12 // =0xc |
| ; CHECK-i64-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: mov x9, #8 // =0x8 |
| ; CHECK-i64-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: mov x9, #4 // =0x4 |
| ; CHECK-i64-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: st1d { z0.d }, p0, [x8] |
| ; CHECK-i64-NEXT: mov sp, x29 |
| ; CHECK-i64-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-i64-NEXT: ret |
| %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float> %x) |
| ret <32 x iXLen> %a |
| } |
| declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float>) |
| |
| define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { |
| ; CHECK-i32-LABEL: lrint_v1f64: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: frintx d0, d0 |
| ; CHECK-i32-NEXT: fcvtzs w8, d0 |
| ; CHECK-i32-NEXT: fmov s0, w8 |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v1f64: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: frintx d0, d0 |
| ; CHECK-i64-NEXT: fcvtzs x8, d0 |
| ; CHECK-i64-NEXT: fmov d0, x8 |
| ; CHECK-i64-NEXT: ret |
| %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x) |
| ret <1 x iXLen> %a |
| } |
| declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>) |
| |
| define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { |
| ; CHECK-i32-LABEL: lrint_v2f64: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: frintx v0.2d, v0.2d |
| ; CHECK-i32-NEXT: mov d1, v0.d[1] |
| ; CHECK-i32-NEXT: fcvtzs w8, d0 |
| ; CHECK-i32-NEXT: fcvtzs w9, d1 |
| ; CHECK-i32-NEXT: fmov s0, w8 |
| ; CHECK-i32-NEXT: mov v0.s[1], w9 |
| ; CHECK-i32-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v2f64: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: frintx v0.2d, v0.2d |
| ; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d |
| ; CHECK-i64-NEXT: ret |
| %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x) |
| ret <2 x iXLen> %a |
| } |
| declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>) |
| |
| define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { |
| ; CHECK-i32-LABEL: lrint_v4f64: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: ptrue p0.d, vl2 |
| ; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-i32-NEXT: splice z0.d, p0, z0.d, z1.d |
| ; CHECK-i32-NEXT: ptrue p0.d, vl4 |
| ; CHECK-i32-NEXT: movprfx z1, z0 |
| ; CHECK-i32-NEXT: frintx z1.d, p0/m, z0.d |
| ; CHECK-i32-NEXT: mov z0.d, z1.d[1] |
| ; CHECK-i32-NEXT: fcvtzs w8, d1 |
| ; CHECK-i32-NEXT: mov z2.d, z1.d[2] |
| ; CHECK-i32-NEXT: mov z1.d, z1.d[3] |
| ; CHECK-i32-NEXT: fcvtzs w9, d0 |
| ; CHECK-i32-NEXT: fmov s0, w8 |
| ; CHECK-i32-NEXT: fcvtzs w8, d2 |
| ; CHECK-i32-NEXT: mov v0.s[1], w9 |
| ; CHECK-i32-NEXT: mov v0.s[2], w8 |
| ; CHECK-i32-NEXT: fcvtzs w8, d1 |
| ; CHECK-i32-NEXT: mov v0.s[3], w8 |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v4f64: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: ptrue p0.d, vl2 |
| ; CHECK-i64-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-i64-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d |
| ; CHECK-i64-NEXT: ptrue p0.d, vl4 |
| ; CHECK-i64-NEXT: frintx z0.d, p0/m, z0.d |
| ; CHECK-i64-NEXT: mov z1.d, z0.d[2] |
| ; CHECK-i64-NEXT: mov z2.d, z0.d[3] |
| ; CHECK-i64-NEXT: mov z3.d, z0.d[1] |
| ; CHECK-i64-NEXT: fcvtzs x9, d0 |
| ; CHECK-i64-NEXT: fcvtzs x8, d1 |
| ; CHECK-i64-NEXT: fcvtzs x10, d2 |
| ; CHECK-i64-NEXT: fcvtzs x11, d3 |
| ; CHECK-i64-NEXT: fmov d0, x9 |
| ; CHECK-i64-NEXT: fmov d1, x8 |
| ; CHECK-i64-NEXT: mov v0.d[1], x11 |
| ; CHECK-i64-NEXT: mov v1.d[1], x10 |
| ; CHECK-i64-NEXT: ret |
| %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x) |
| ret <4 x iXLen> %a |
| } |
| declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>) |
| |
| define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { |
| ; CHECK-i32-LABEL: lrint_v8f64: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: ptrue p0.d, vl2 |
| ; CHECK-i32-NEXT: // kill: def $q2 killed $q2 def $z2 |
| ; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-i32-NEXT: // kill: def $q3 killed $q3 def $z3 |
| ; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-i32-NEXT: splice z0.d, p0, z0.d, z1.d |
| ; CHECK-i32-NEXT: splice z2.d, p0, z2.d, z3.d |
| ; CHECK-i32-NEXT: ptrue p0.d, vl4 |
| ; CHECK-i32-NEXT: movprfx z3, z0 |
| ; CHECK-i32-NEXT: frintx z3.d, p0/m, z0.d |
| ; CHECK-i32-NEXT: frintx z2.d, p0/m, z2.d |
| ; CHECK-i32-NEXT: mov z0.d, z3.d[1] |
| ; CHECK-i32-NEXT: mov z1.d, z2.d[1] |
| ; CHECK-i32-NEXT: fcvtzs w8, d3 |
| ; CHECK-i32-NEXT: fcvtzs w9, d2 |
| ; CHECK-i32-NEXT: mov z4.d, z3.d[2] |
| ; CHECK-i32-NEXT: mov z5.d, z2.d[2] |
| ; CHECK-i32-NEXT: mov z3.d, z3.d[3] |
| ; CHECK-i32-NEXT: mov z2.d, z2.d[3] |
| ; CHECK-i32-NEXT: fcvtzs w10, d0 |
| ; CHECK-i32-NEXT: fcvtzs w11, d1 |
| ; CHECK-i32-NEXT: fmov s0, w8 |
| ; CHECK-i32-NEXT: fcvtzs w8, d4 |
| ; CHECK-i32-NEXT: fmov s1, w9 |
| ; CHECK-i32-NEXT: fcvtzs w9, d5 |
| ; CHECK-i32-NEXT: mov v0.s[1], w10 |
| ; CHECK-i32-NEXT: mov v1.s[1], w11 |
| ; CHECK-i32-NEXT: mov v0.s[2], w8 |
| ; CHECK-i32-NEXT: fcvtzs w8, d3 |
| ; CHECK-i32-NEXT: mov v1.s[2], w9 |
| ; CHECK-i32-NEXT: fcvtzs w9, d2 |
| ; CHECK-i32-NEXT: mov v0.s[3], w8 |
| ; CHECK-i32-NEXT: mov v1.s[3], w9 |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v8f64: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: ptrue p0.d, vl2 |
| ; CHECK-i64-NEXT: // kill: def $q2 killed $q2 def $z2 |
| ; CHECK-i64-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-i64-NEXT: // kill: def $q3 killed $q3 def $z3 |
| ; CHECK-i64-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d |
| ; CHECK-i64-NEXT: splice z2.d, p0, z2.d, z3.d |
| ; CHECK-i64-NEXT: ptrue p0.d, vl4 |
| ; CHECK-i64-NEXT: frintx z0.d, p0/m, z0.d |
| ; CHECK-i64-NEXT: movprfx z1, z2 |
| ; CHECK-i64-NEXT: frintx z1.d, p0/m, z2.d |
| ; CHECK-i64-NEXT: mov z4.d, z1.d[2] |
| ; CHECK-i64-NEXT: mov z5.d, z0.d[2] |
| ; CHECK-i64-NEXT: mov z2.d, z0.d[1] |
| ; CHECK-i64-NEXT: mov z3.d, z1.d[3] |
| ; CHECK-i64-NEXT: mov z6.d, z0.d[3] |
| ; CHECK-i64-NEXT: fcvtzs x8, d0 |
| ; CHECK-i64-NEXT: mov z0.d, z1.d[1] |
| ; CHECK-i64-NEXT: fcvtzs x10, d1 |
| ; CHECK-i64-NEXT: fcvtzs x11, d4 |
| ; CHECK-i64-NEXT: fcvtzs x12, d5 |
| ; CHECK-i64-NEXT: fcvtzs x9, d2 |
| ; CHECK-i64-NEXT: fcvtzs x13, d3 |
| ; CHECK-i64-NEXT: fcvtzs x14, d6 |
| ; CHECK-i64-NEXT: fcvtzs x15, d0 |
| ; CHECK-i64-NEXT: fmov d0, x8 |
| ; CHECK-i64-NEXT: fmov d2, x10 |
| ; CHECK-i64-NEXT: fmov d1, x12 |
| ; CHECK-i64-NEXT: fmov d3, x11 |
| ; CHECK-i64-NEXT: mov v0.d[1], x9 |
| ; CHECK-i64-NEXT: mov v2.d[1], x15 |
| ; CHECK-i64-NEXT: mov v1.d[1], x14 |
| ; CHECK-i64-NEXT: mov v3.d[1], x13 |
| ; CHECK-i64-NEXT: ret |
| %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x) |
| ret <8 x iXLen> %a |
| } |
| declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>) |
| |
| define <16 x iXLen> @lrint_v16f64(<16 x double> %x) { |
| ; CHECK-i32-LABEL: lrint_v16f64: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: ptrue p1.d, vl2 |
| ; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-i32-NEXT: // kill: def $q6 killed $q6 def $z6 |
| ; CHECK-i32-NEXT: // kill: def $q4 killed $q4 def $z4 |
| ; CHECK-i32-NEXT: // kill: def $q2 killed $q2 def $z2 |
| ; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-i32-NEXT: // kill: def $q7 killed $q7 def $z7 |
| ; CHECK-i32-NEXT: // kill: def $q5 killed $q5 def $z5 |
| ; CHECK-i32-NEXT: // kill: def $q3 killed $q3 def $z3 |
| ; CHECK-i32-NEXT: ptrue p0.d, vl4 |
| ; CHECK-i32-NEXT: splice z0.d, p1, z0.d, z1.d |
| ; CHECK-i32-NEXT: splice z2.d, p1, z2.d, z3.d |
| ; CHECK-i32-NEXT: splice z4.d, p1, z4.d, z5.d |
| ; CHECK-i32-NEXT: splice z6.d, p1, z6.d, z7.d |
| ; CHECK-i32-NEXT: movprfx z5, z0 |
| ; CHECK-i32-NEXT: frintx z5.d, p0/m, z0.d |
| ; CHECK-i32-NEXT: movprfx z7, z2 |
| ; CHECK-i32-NEXT: frintx z7.d, p0/m, z2.d |
| ; CHECK-i32-NEXT: frintx z4.d, p0/m, z4.d |
| ; CHECK-i32-NEXT: frintx z6.d, p0/m, z6.d |
| ; CHECK-i32-NEXT: fcvtzs w8, d5 |
| ; CHECK-i32-NEXT: mov z0.d, z5.d[1] |
| ; CHECK-i32-NEXT: mov z1.d, z7.d[1] |
| ; CHECK-i32-NEXT: fcvtzs w9, d7 |
| ; CHECK-i32-NEXT: mov z3.d, z4.d[1] |
| ; CHECK-i32-NEXT: fcvtzs w10, d4 |
| ; CHECK-i32-NEXT: mov z16.d, z6.d[1] |
| ; CHECK-i32-NEXT: fcvtzs w12, d6 |
| ; CHECK-i32-NEXT: mov z2.d, z5.d[2] |
| ; CHECK-i32-NEXT: fcvtzs w11, d0 |
| ; CHECK-i32-NEXT: fcvtzs w13, d1 |
| ; CHECK-i32-NEXT: mov z17.d, z7.d[2] |
| ; CHECK-i32-NEXT: fcvtzs w14, d3 |
| ; CHECK-i32-NEXT: fmov s0, w8 |
| ; CHECK-i32-NEXT: mov z18.d, z4.d[2] |
| ; CHECK-i32-NEXT: fcvtzs w8, d16 |
| ; CHECK-i32-NEXT: mov z19.d, z6.d[2] |
| ; CHECK-i32-NEXT: fcvtzs w15, d2 |
| ; CHECK-i32-NEXT: fmov s1, w9 |
| ; CHECK-i32-NEXT: fmov s2, w10 |
| ; CHECK-i32-NEXT: fmov s3, w12 |
| ; CHECK-i32-NEXT: fcvtzs w9, d17 |
| ; CHECK-i32-NEXT: fcvtzs w10, d18 |
| ; CHECK-i32-NEXT: mov v0.s[1], w11 |
| ; CHECK-i32-NEXT: fcvtzs w11, d19 |
| ; CHECK-i32-NEXT: mov z5.d, z5.d[3] |
| ; CHECK-i32-NEXT: mov z7.d, z7.d[3] |
| ; CHECK-i32-NEXT: mov v1.s[1], w13 |
| ; CHECK-i32-NEXT: mov v2.s[1], w14 |
| ; CHECK-i32-NEXT: mov v3.s[1], w8 |
| ; CHECK-i32-NEXT: mov z4.d, z4.d[3] |
| ; CHECK-i32-NEXT: mov z6.d, z6.d[3] |
| ; CHECK-i32-NEXT: mov v0.s[2], w15 |
| ; CHECK-i32-NEXT: fcvtzs w8, d5 |
| ; CHECK-i32-NEXT: mov v1.s[2], w9 |
| ; CHECK-i32-NEXT: fcvtzs w9, d7 |
| ; CHECK-i32-NEXT: mov v2.s[2], w10 |
| ; CHECK-i32-NEXT: fcvtzs w10, d4 |
| ; CHECK-i32-NEXT: mov v3.s[2], w11 |
| ; CHECK-i32-NEXT: fcvtzs w11, d6 |
| ; CHECK-i32-NEXT: mov v0.s[3], w8 |
| ; CHECK-i32-NEXT: mov v1.s[3], w9 |
| ; CHECK-i32-NEXT: mov v2.s[3], w10 |
| ; CHECK-i32-NEXT: mov v3.s[3], w11 |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v16f64: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: ptrue p1.d, vl2 |
| ; CHECK-i64-NEXT: // kill: def $q6 killed $q6 def $z6 |
| ; CHECK-i64-NEXT: // kill: def $q4 killed $q4 def $z4 |
| ; CHECK-i64-NEXT: // kill: def $q7 killed $q7 def $z7 |
| ; CHECK-i64-NEXT: // kill: def $q5 killed $q5 def $z5 |
| ; CHECK-i64-NEXT: // kill: def $q2 killed $q2 def $z2 |
| ; CHECK-i64-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-i64-NEXT: // kill: def $q3 killed $q3 def $z3 |
| ; CHECK-i64-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-i64-NEXT: ptrue p0.d, vl4 |
| ; CHECK-i64-NEXT: splice z6.d, p1, z6.d, z7.d |
| ; CHECK-i64-NEXT: splice z4.d, p1, z4.d, z5.d |
| ; CHECK-i64-NEXT: splice z2.d, p1, z2.d, z3.d |
| ; CHECK-i64-NEXT: splice z0.d, p1, z0.d, z1.d |
| ; CHECK-i64-NEXT: movprfx z3, z6 |
| ; CHECK-i64-NEXT: frintx z3.d, p0/m, z6.d |
| ; CHECK-i64-NEXT: movprfx z1, z4 |
| ; CHECK-i64-NEXT: frintx z1.d, p0/m, z4.d |
| ; CHECK-i64-NEXT: frintx z2.d, p0/m, z2.d |
| ; CHECK-i64-NEXT: frintx z0.d, p0/m, z0.d |
| ; CHECK-i64-NEXT: mov z4.d, z3.d[2] |
| ; CHECK-i64-NEXT: mov z5.d, z1.d[2] |
| ; CHECK-i64-NEXT: mov z6.d, z2.d[3] |
| ; CHECK-i64-NEXT: fcvtzs x11, d0 |
| ; CHECK-i64-NEXT: fcvtzs x12, d1 |
| ; CHECK-i64-NEXT: fcvtzs x13, d2 |
| ; CHECK-i64-NEXT: fcvtzs x14, d3 |
| ; CHECK-i64-NEXT: mov z7.d, z3.d[3] |
| ; CHECK-i64-NEXT: mov z16.d, z1.d[3] |
| ; CHECK-i64-NEXT: fcvtzs x9, d4 |
| ; CHECK-i64-NEXT: fcvtzs x10, d5 |
| ; CHECK-i64-NEXT: mov z4.d, z2.d[2] |
| ; CHECK-i64-NEXT: mov z5.d, z0.d[2] |
| ; CHECK-i64-NEXT: fcvtzs x8, d6 |
| ; CHECK-i64-NEXT: mov z2.d, z2.d[1] |
| ; CHECK-i64-NEXT: mov z6.d, z0.d[3] |
| ; CHECK-i64-NEXT: mov z1.d, z1.d[1] |
| ; CHECK-i64-NEXT: mov z3.d, z3.d[1] |
| ; CHECK-i64-NEXT: fcvtzs x15, d4 |
| ; CHECK-i64-NEXT: mov z4.d, z0.d[1] |
| ; CHECK-i64-NEXT: fmov d0, x11 |
| ; CHECK-i64-NEXT: fcvtzs x16, d5 |
| ; CHECK-i64-NEXT: fcvtzs x11, d2 |
| ; CHECK-i64-NEXT: fmov d2, x13 |
| ; CHECK-i64-NEXT: fcvtzs x17, d7 |
| ; CHECK-i64-NEXT: fcvtzs x18, d16 |
| ; CHECK-i64-NEXT: fcvtzs x0, d3 |
| ; CHECK-i64-NEXT: fcvtzs x13, d4 |
| ; CHECK-i64-NEXT: fmov d4, x12 |
| ; CHECK-i64-NEXT: fcvtzs x12, d6 |
| ; CHECK-i64-NEXT: fmov d6, x14 |
| ; CHECK-i64-NEXT: fcvtzs x14, d1 |
| ; CHECK-i64-NEXT: fmov d3, x15 |
| ; CHECK-i64-NEXT: fmov d1, x16 |
| ; CHECK-i64-NEXT: fmov d5, x10 |
| ; CHECK-i64-NEXT: fmov d7, x9 |
| ; CHECK-i64-NEXT: mov v2.d[1], x11 |
| ; CHECK-i64-NEXT: mov v0.d[1], x13 |
| ; CHECK-i64-NEXT: mov v3.d[1], x8 |
| ; CHECK-i64-NEXT: mov v6.d[1], x0 |
| ; CHECK-i64-NEXT: mov v4.d[1], x14 |
| ; CHECK-i64-NEXT: mov v1.d[1], x12 |
| ; CHECK-i64-NEXT: mov v5.d[1], x18 |
| ; CHECK-i64-NEXT: mov v7.d[1], x17 |
| ; CHECK-i64-NEXT: ret |
| %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double> %x) |
| ret <16 x iXLen> %a |
| } |
| declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>) |
| |
| define <32 x iXLen> @lrint_v32f64(<32 x double> %x) { |
| ; CHECK-i32-LABEL: lrint_v32f64: |
| ; CHECK-i32: // %bb.0: |
| ; CHECK-i32-NEXT: ptrue p1.d, vl2 |
| ; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-i32-NEXT: // kill: def $q3 killed $q3 def $z3 |
| ; CHECK-i32-NEXT: // kill: def $q2 killed $q2 def $z2 |
| ; CHECK-i32-NEXT: // kill: def $q4 killed $q4 def $z4 |
| ; CHECK-i32-NEXT: // kill: def $q5 killed $q5 def $z5 |
| ; CHECK-i32-NEXT: // kill: def $q7 killed $q7 def $z7 |
| ; CHECK-i32-NEXT: // kill: def $q6 killed $q6 def $z6 |
| ; CHECK-i32-NEXT: ptrue p0.d, vl4 |
| ; CHECK-i32-NEXT: splice z0.d, p1, z0.d, z1.d |
| ; CHECK-i32-NEXT: splice z2.d, p1, z2.d, z3.d |
| ; CHECK-i32-NEXT: splice z4.d, p1, z4.d, z5.d |
| ; CHECK-i32-NEXT: ldp q1, q3, [sp] |
| ; CHECK-i32-NEXT: splice z6.d, p1, z6.d, z7.d |
| ; CHECK-i32-NEXT: frintx z0.d, p0/m, z0.d |
| ; CHECK-i32-NEXT: splice z1.d, p1, z1.d, z3.d |
| ; CHECK-i32-NEXT: movprfx z18, z2 |
| ; CHECK-i32-NEXT: frintx z18.d, p0/m, z2.d |
| ; CHECK-i32-NEXT: ldp q5, q3, [sp, #96] |
| ; CHECK-i32-NEXT: ldp q2, q7, [sp, #64] |
| ; CHECK-i32-NEXT: splice z5.d, p1, z5.d, z3.d |
| ; CHECK-i32-NEXT: movprfx z3, z4 |
| ; CHECK-i32-NEXT: frintx z3.d, p0/m, z4.d |
| ; CHECK-i32-NEXT: mov z4.d, z0.d[1] |
| ; CHECK-i32-NEXT: fcvtzs w8, d0 |
| ; CHECK-i32-NEXT: splice z2.d, p1, z2.d, z7.d |
| ; CHECK-i32-NEXT: mov z19.d, z18.d[1] |
| ; CHECK-i32-NEXT: ldp q7, q16, [sp, #32] |
| ; CHECK-i32-NEXT: movprfx z17, z1 |
| ; CHECK-i32-NEXT: frintx z17.d, p0/m, z1.d |
| ; CHECK-i32-NEXT: fcvtzs w10, d4 |
| ; CHECK-i32-NEXT: mov z1.d, z0.d[2] |
| ; CHECK-i32-NEXT: fcvtzs w9, d18 |
| ; CHECK-i32-NEXT: mov z4.d, z0.d[3] |
| ; CHECK-i32-NEXT: fcvtzs w11, d19 |
| ; CHECK-i32-NEXT: mov z20.d, z18.d[3] |
| ; CHECK-i32-NEXT: fmov s0, w8 |
| ; CHECK-i32-NEXT: splice z7.d, p1, z7.d, z16.d |
| ; CHECK-i32-NEXT: movprfx z16, z6 |
| ; CHECK-i32-NEXT: frintx z16.d, p0/m, z6.d |
| ; CHECK-i32-NEXT: mov z6.d, z18.d[2] |
| ; CHECK-i32-NEXT: mov z18.d, z3.d[1] |
| ; CHECK-i32-NEXT: fcvtzs w12, d3 |
| ; CHECK-i32-NEXT: fcvtzs w13, d1 |
| ; CHECK-i32-NEXT: fmov s1, w9 |
| ; CHECK-i32-NEXT: movprfx z19, z2 |
| ; CHECK-i32-NEXT: frintx z19.d, p0/m, z2.d |
| ; CHECK-i32-NEXT: mov v0.s[1], w10 |
| ; CHECK-i32-NEXT: mov z21.d, z3.d[2] |
| ; CHECK-i32-NEXT: fcvtzs w8, d4 |
| ; CHECK-i32-NEXT: fcvtzs w14, d6 |
| ; CHECK-i32-NEXT: mov z6.d, z16.d[1] |
| ; CHECK-i32-NEXT: fcvtzs w15, d18 |
| ; CHECK-i32-NEXT: movprfx z18, z7 |
| ; CHECK-i32-NEXT: frintx z18.d, p0/m, z7.d |
| ; CHECK-i32-NEXT: mov v1.s[1], w11 |
| ; CHECK-i32-NEXT: fmov s2, w12 |
| ; CHECK-i32-NEXT: mov z7.d, z17.d[1] |
| ; CHECK-i32-NEXT: mov z4.d, z16.d[2] |
| ; CHECK-i32-NEXT: fcvtzs w16, d16 |
| ; CHECK-i32-NEXT: mov v0.s[2], w13 |
| ; CHECK-i32-NEXT: fcvtzs w13, d17 |
| ; CHECK-i32-NEXT: fcvtzs w12, d6 |
| ; CHECK-i32-NEXT: mov z6.d, z19.d[1] |
| ; CHECK-i32-NEXT: fcvtzs w11, d21 |
| ; CHECK-i32-NEXT: movprfx z21, z5 |
| ; CHECK-i32-NEXT: frintx z21.d, p0/m, z5.d |
| ; CHECK-i32-NEXT: mov z3.d, z3.d[3] |
| ; CHECK-i32-NEXT: mov v2.s[1], w15 |
| ; CHECK-i32-NEXT: mov z5.d, z18.d[1] |
| ; CHECK-i32-NEXT: fcvtzs w15, d7 |
| ; CHECK-i32-NEXT: fcvtzs w0, d19 |
| ; CHECK-i32-NEXT: mov v1.s[2], w14 |
| ; CHECK-i32-NEXT: fcvtzs w14, d4 |
| ; CHECK-i32-NEXT: mov z7.d, z18.d[2] |
| ; CHECK-i32-NEXT: fmov s4, w13 |
| ; CHECK-i32-NEXT: fcvtzs w13, d6 |
| ; CHECK-i32-NEXT: mov z6.d, z19.d[2] |
| ; CHECK-i32-NEXT: fcvtzs w10, d3 |
| ; CHECK-i32-NEXT: fmov s3, w16 |
| ; CHECK-i32-NEXT: fcvtzs w17, d18 |
| ; CHECK-i32-NEXT: fcvtzs w18, d5 |
| ; CHECK-i32-NEXT: mov z5.d, z21.d[1] |
| ; CHECK-i32-NEXT: fcvtzs w2, d21 |
| ; CHECK-i32-NEXT: fcvtzs w1, d7 |
| ; CHECK-i32-NEXT: mov z7.d, z21.d[2] |
| ; CHECK-i32-NEXT: mov v4.s[1], w15 |
| ; CHECK-i32-NEXT: fcvtzs w15, d6 |
| ; CHECK-i32-NEXT: fmov s6, w0 |
| ; CHECK-i32-NEXT: mov v3.s[1], w12 |
| ; CHECK-i32-NEXT: fcvtzs w9, d20 |
| ; CHECK-i32-NEXT: fcvtzs w12, d5 |
| ; CHECK-i32-NEXT: mov z20.d, z17.d[2] |
| ; CHECK-i32-NEXT: fmov s5, w17 |
| ; CHECK-i32-NEXT: mov z16.d, z16.d[3] |
| ; CHECK-i32-NEXT: mov z17.d, z17.d[3] |
| ; CHECK-i32-NEXT: mov z18.d, z18.d[3] |
| ; CHECK-i32-NEXT: mov v6.s[1], w13 |
| ; CHECK-i32-NEXT: fcvtzs w13, d7 |
| ; CHECK-i32-NEXT: fmov s7, w2 |
| ; CHECK-i32-NEXT: fcvtzs w16, d20 |
| ; CHECK-i32-NEXT: mov v5.s[1], w18 |
| ; CHECK-i32-NEXT: mov z19.d, z19.d[3] |
| ; CHECK-i32-NEXT: mov z20.d, z21.d[3] |
| ; CHECK-i32-NEXT: mov v2.s[2], w11 |
| ; CHECK-i32-NEXT: mov v3.s[2], w14 |
| ; CHECK-i32-NEXT: mov v7.s[1], w12 |
| ; CHECK-i32-NEXT: fcvtzs w11, d16 |
| ; CHECK-i32-NEXT: fcvtzs w12, d17 |
| ; CHECK-i32-NEXT: fcvtzs w14, d18 |
| ; CHECK-i32-NEXT: mov v6.s[2], w15 |
| ; CHECK-i32-NEXT: fcvtzs w15, d19 |
| ; CHECK-i32-NEXT: mov v4.s[2], w16 |
| ; CHECK-i32-NEXT: mov v5.s[2], w1 |
| ; CHECK-i32-NEXT: mov v0.s[3], w8 |
| ; CHECK-i32-NEXT: mov v1.s[3], w9 |
| ; CHECK-i32-NEXT: mov v2.s[3], w10 |
| ; CHECK-i32-NEXT: mov v7.s[2], w13 |
| ; CHECK-i32-NEXT: fcvtzs w13, d20 |
| ; CHECK-i32-NEXT: mov v3.s[3], w11 |
| ; CHECK-i32-NEXT: mov v6.s[3], w15 |
| ; CHECK-i32-NEXT: mov v4.s[3], w12 |
| ; CHECK-i32-NEXT: mov v5.s[3], w14 |
| ; CHECK-i32-NEXT: mov v7.s[3], w13 |
| ; CHECK-i32-NEXT: ret |
| ; |
| ; CHECK-i64-LABEL: lrint_v32f64: |
| ; CHECK-i64: // %bb.0: |
| ; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-i64-NEXT: sub x9, sp, #272 |
| ; CHECK-i64-NEXT: mov x29, sp |
| ; CHECK-i64-NEXT: and sp, x9, #0xffffffffffffffe0 |
| ; CHECK-i64-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK-i64-NEXT: .cfi_offset w30, -8 |
| ; CHECK-i64-NEXT: .cfi_offset w29, -16 |
| ; CHECK-i64-NEXT: ptrue p1.d, vl2 |
| ; CHECK-i64-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-i64-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-i64-NEXT: // kill: def $q3 killed $q3 def $z3 |
| ; CHECK-i64-NEXT: // kill: def $q2 killed $q2 def $z2 |
| ; CHECK-i64-NEXT: // kill: def $q7 killed $q7 def $z7 |
| ; CHECK-i64-NEXT: // kill: def $q6 killed $q6 def $z6 |
| ; CHECK-i64-NEXT: // kill: def $q4 killed $q4 def $z4 |
| ; CHECK-i64-NEXT: // kill: def $q5 killed $q5 def $z5 |
| ; CHECK-i64-NEXT: ptrue p0.d, vl4 |
| ; CHECK-i64-NEXT: splice z0.d, p1, z0.d, z1.d |
| ; CHECK-i64-NEXT: splice z2.d, p1, z2.d, z3.d |
| ; CHECK-i64-NEXT: splice z4.d, p1, z4.d, z5.d |
| ; CHECK-i64-NEXT: splice z6.d, p1, z6.d, z7.d |
| ; CHECK-i64-NEXT: ldp q5, q19, [x29, #16] |
| ; CHECK-i64-NEXT: movprfx z3, z0 |
| ; CHECK-i64-NEXT: frintx z3.d, p0/m, z0.d |
| ; CHECK-i64-NEXT: movprfx z16, z2 |
| ; CHECK-i64-NEXT: frintx z16.d, p0/m, z2.d |
| ; CHECK-i64-NEXT: frintx z4.d, p0/m, z4.d |
| ; CHECK-i64-NEXT: splice z5.d, p1, z5.d, z19.d |
| ; CHECK-i64-NEXT: frintx z6.d, p0/m, z6.d |
| ; CHECK-i64-NEXT: ldp q2, q17, [x29, #48] |
| ; CHECK-i64-NEXT: ldp q0, q1, [x29, #112] |
| ; CHECK-i64-NEXT: mov z18.d, z3.d[3] |
| ; CHECK-i64-NEXT: mov z7.d, z3.d[2] |
| ; CHECK-i64-NEXT: fcvtzs x9, d3 |
| ; CHECK-i64-NEXT: mov z3.d, z3.d[1] |
| ; CHECK-i64-NEXT: mov z20.d, z16.d[3] |
| ; CHECK-i64-NEXT: fcvtzs x12, d16 |
| ; CHECK-i64-NEXT: splice z2.d, p1, z2.d, z17.d |
| ; CHECK-i64-NEXT: frintx z5.d, p0/m, z5.d |
| ; CHECK-i64-NEXT: splice z0.d, p1, z0.d, z1.d |
| ; CHECK-i64-NEXT: fcvtzs x10, d18 |
| ; CHECK-i64-NEXT: fcvtzs x11, d7 |
| ; CHECK-i64-NEXT: mov z18.d, z16.d[2] |
| ; CHECK-i64-NEXT: mov z7.d, z16.d[1] |
| ; CHECK-i64-NEXT: fcvtzs x13, d3 |
| ; CHECK-i64-NEXT: fcvtzs x14, d20 |
| ; CHECK-i64-NEXT: str x9, [sp, #128] |
| ; CHECK-i64-NEXT: mov z16.d, z4.d[3] |
| ; CHECK-i64-NEXT: fcvtzs x9, d18 |
| ; CHECK-i64-NEXT: mov z18.d, z4.d[2] |
| ; CHECK-i64-NEXT: frintx z2.d, p0/m, z2.d |
| ; CHECK-i64-NEXT: stp x11, x10, [sp, #144] |
| ; CHECK-i64-NEXT: fcvtzs x10, d7 |
| ; CHECK-i64-NEXT: mov z7.d, z4.d[1] |
| ; CHECK-i64-NEXT: str x13, [sp, #136] |
| ; CHECK-i64-NEXT: fcvtzs x11, d16 |
| ; CHECK-i64-NEXT: mov z16.d, z6.d[3] |
| ; CHECK-i64-NEXT: fcvtzs x13, d18 |
| ; CHECK-i64-NEXT: ldp q3, q19, [x29, #80] |
| ; CHECK-i64-NEXT: stp x9, x14, [sp, #176] |
| ; CHECK-i64-NEXT: fcvtzs x9, d4 |
| ; CHECK-i64-NEXT: mov z4.d, z6.d[2] |
| ; CHECK-i64-NEXT: stp x12, x10, [sp, #160] |
| ; CHECK-i64-NEXT: fcvtzs x10, d7 |
| ; CHECK-i64-NEXT: mov z7.d, z6.d[1] |
| ; CHECK-i64-NEXT: fcvtzs x12, d6 |
| ; CHECK-i64-NEXT: splice z3.d, p1, z3.d, z19.d |
| ; CHECK-i64-NEXT: mov z6.d, z5.d[2] |
| ; CHECK-i64-NEXT: stp x13, x11, [sp, #208] |
| ; CHECK-i64-NEXT: fcvtzs x11, d16 |
| ; CHECK-i64-NEXT: fcvtzs x13, d4 |
| ; CHECK-i64-NEXT: mov z4.d, z5.d[3] |
| ; CHECK-i64-NEXT: mov z1.d, z5.d[1] |
| ; CHECK-i64-NEXT: frintx z0.d, p0/m, z0.d |
| ; CHECK-i64-NEXT: stp x9, x10, [sp, #192] |
| ; CHECK-i64-NEXT: fcvtzs x9, d7 |
| ; CHECK-i64-NEXT: frintx z3.d, p0/m, z3.d |
| ; CHECK-i64-NEXT: fcvtzs x10, d4 |
| ; CHECK-i64-NEXT: stp x13, x11, [sp, #240] |
| ; CHECK-i64-NEXT: fcvtzs x11, d6 |
| ; CHECK-i64-NEXT: mov z4.d, z2.d[3] |
| ; CHECK-i64-NEXT: fcvtzs x13, d2 |
| ; CHECK-i64-NEXT: stp x12, x9, [sp, #224] |
| ; CHECK-i64-NEXT: fcvtzs x9, d5 |
| ; CHECK-i64-NEXT: fcvtzs x12, d1 |
| ; CHECK-i64-NEXT: mov z5.d, z2.d[2] |
| ; CHECK-i64-NEXT: mov z1.d, z2.d[1] |
| ; CHECK-i64-NEXT: mov z2.d, z3.d[2] |
| ; CHECK-i64-NEXT: stp x11, x10, [sp, #16] |
| ; CHECK-i64-NEXT: fcvtzs x10, d4 |
| ; CHECK-i64-NEXT: mov z4.d, z3.d[3] |
| ; CHECK-i64-NEXT: fcvtzs x11, d5 |
| ; CHECK-i64-NEXT: stp x9, x12, [sp] |
| ; CHECK-i64-NEXT: fcvtzs x9, d1 |
| ; CHECK-i64-NEXT: mov z1.d, z3.d[1] |
| ; CHECK-i64-NEXT: fcvtzs x12, d4 |
| ; CHECK-i64-NEXT: stp x11, x10, [sp, #48] |
| ; CHECK-i64-NEXT: fcvtzs x10, d2 |
| ; CHECK-i64-NEXT: fcvtzs x11, d3 |
| ; CHECK-i64-NEXT: stp x13, x9, [sp, #32] |
| ; CHECK-i64-NEXT: fcvtzs x9, d1 |
| ; CHECK-i64-NEXT: mov z2.d, z0.d[3] |
| ; CHECK-i64-NEXT: mov z3.d, z0.d[2] |
| ; CHECK-i64-NEXT: mov z1.d, z0.d[1] |
| ; CHECK-i64-NEXT: fcvtzs x13, d2 |
| ; CHECK-i64-NEXT: stp x10, x12, [sp, #80] |
| ; CHECK-i64-NEXT: fcvtzs x12, d0 |
| ; CHECK-i64-NEXT: fcvtzs x10, d3 |
| ; CHECK-i64-NEXT: stp x11, x9, [sp, #64] |
| ; CHECK-i64-NEXT: fcvtzs x9, d1 |
| ; CHECK-i64-NEXT: stp x10, x13, [sp, #112] |
| ; CHECK-i64-NEXT: add x10, sp, #192 |
| ; CHECK-i64-NEXT: stp x12, x9, [sp, #96] |
| ; CHECK-i64-NEXT: add x9, sp, #128 |
| ; CHECK-i64-NEXT: ld1d { z0.d }, p0/z, [x9] |
| ; CHECK-i64-NEXT: add x9, sp, #160 |
| ; CHECK-i64-NEXT: ld1d { z2.d }, p0/z, [x10] |
| ; CHECK-i64-NEXT: ld1d { z1.d }, p0/z, [x9] |
| ; CHECK-i64-NEXT: add x9, sp, #96 |
| ; CHECK-i64-NEXT: add x10, sp, #224 |
| ; CHECK-i64-NEXT: ld1d { z3.d }, p0/z, [x9] |
| ; CHECK-i64-NEXT: add x9, sp, #64 |
| ; CHECK-i64-NEXT: ld1d { z4.d }, p0/z, [x10] |
| ; CHECK-i64-NEXT: add x10, sp, #32 |
| ; CHECK-i64-NEXT: ld1d { z5.d }, p0/z, [x9] |
| ; CHECK-i64-NEXT: mov x9, sp |
| ; CHECK-i64-NEXT: ld1d { z6.d }, p0/z, [x10] |
| ; CHECK-i64-NEXT: mov x10, #28 // =0x1c |
| ; CHECK-i64-NEXT: ld1d { z7.d }, p0/z, [x9] |
| ; CHECK-i64-NEXT: mov x9, #24 // =0x18 |
| ; CHECK-i64-NEXT: st1d { z3.d }, p0, [x8, x10, lsl #3] |
| ; CHECK-i64-NEXT: st1d { z5.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: mov x9, #20 // =0x14 |
| ; CHECK-i64-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: mov x9, #16 // =0x10 |
| ; CHECK-i64-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: mov x9, #12 // =0xc |
| ; CHECK-i64-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: mov x9, #8 // =0x8 |
| ; CHECK-i64-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: mov x9, #4 // =0x4 |
| ; CHECK-i64-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] |
| ; CHECK-i64-NEXT: st1d { z0.d }, p0, [x8] |
| ; CHECK-i64-NEXT: mov sp, x29 |
| ; CHECK-i64-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-i64-NEXT: ret |
| %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16f64(<32 x double> %x) |
| ret <32 x iXLen> %a |
| } |
| declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>) |