| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=aarch64 < %s | FileCheck %s --check-prefixes=CHECK,SIGNED-ZEROS |
| ; RUN: llc -mtriple=aarch64 --enable-no-signed-zeros-fp-math < %s | FileCheck %s --check-prefixes=CHECK,NO-SIGNED-ZEROS |
| |
| ; Test folding of float->int->float roundtrips into float-only operations. |
| ; The optimization could converts patterns like: |
| ; sitofp(fptosi(x)) -> ftrunc(x) |
| ; sitofp(smin(fptosi(x), C)) -> fminnum(ftrunc(x), (float)C) |
| ; This is relevant for AArch64 as it avoids GPR bouncing and keeps computation in SIMD/FP registers. |
| |
| define float @test_signed_basic(float %x) { |
| ; SIGNED-ZEROS-LABEL: test_signed_basic: |
| ; SIGNED-ZEROS: // %bb.0: // %entry |
| ; SIGNED-ZEROS-NEXT: fcvtzs s0, s0 |
| ; SIGNED-ZEROS-NEXT: scvtf s0, s0 |
| ; SIGNED-ZEROS-NEXT: ret |
| ; |
| ; NO-SIGNED-ZEROS-LABEL: test_signed_basic: |
| ; NO-SIGNED-ZEROS: // %bb.0: // %entry |
| ; NO-SIGNED-ZEROS-NEXT: frintz s0, s0 |
| ; NO-SIGNED-ZEROS-NEXT: ret |
| entry: |
| %i = fptosi float %x to i32 |
| %f = sitofp i32 %i to float |
| ret float %f |
| } |
| |
| define float @test_unsigned_basic(float %x) { |
| ; SIGNED-ZEROS-LABEL: test_unsigned_basic: |
| ; SIGNED-ZEROS: // %bb.0: // %entry |
| ; SIGNED-ZEROS-NEXT: fcvtzu s0, s0 |
| ; SIGNED-ZEROS-NEXT: ucvtf s0, s0 |
| ; SIGNED-ZEROS-NEXT: ret |
| ; |
| ; NO-SIGNED-ZEROS-LABEL: test_unsigned_basic: |
| ; NO-SIGNED-ZEROS: // %bb.0: // %entry |
| ; NO-SIGNED-ZEROS-NEXT: frintz s0, s0 |
| ; NO-SIGNED-ZEROS-NEXT: ret |
| entry: |
| %i = fptoui float %x to i32 |
| %f = uitofp i32 %i to float |
| ret float %f |
| } |
| |
| define float @test_signed_min_max(float %x) { |
| ; SIGNED-ZEROS-LABEL: test_signed_min_max: |
| ; SIGNED-ZEROS: // %bb.0: // %entry |
| ; SIGNED-ZEROS-NEXT: fcvtzs w9, s0 |
| ; SIGNED-ZEROS-NEXT: mov w8, #-512 // =0xfffffe00 |
| ; SIGNED-ZEROS-NEXT: cmn w9, #512 |
| ; SIGNED-ZEROS-NEXT: csel w8, w9, w8, gt |
| ; SIGNED-ZEROS-NEXT: mov w9, #1023 // =0x3ff |
| ; SIGNED-ZEROS-NEXT: cmp w8, #1023 |
| ; SIGNED-ZEROS-NEXT: csel w8, w8, w9, lt |
| ; SIGNED-ZEROS-NEXT: scvtf s0, w8 |
| ; SIGNED-ZEROS-NEXT: ret |
| ; |
| ; NO-SIGNED-ZEROS-LABEL: test_signed_min_max: |
| ; NO-SIGNED-ZEROS: // %bb.0: // %entry |
| ; NO-SIGNED-ZEROS-NEXT: movi v1.2s, #196, lsl #24 |
| ; NO-SIGNED-ZEROS-NEXT: frintz s0, s0 |
| ; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000 |
| ; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16 |
| ; NO-SIGNED-ZEROS-NEXT: fmaxnm s0, s0, s1 |
| ; NO-SIGNED-ZEROS-NEXT: fmov s1, w8 |
| ; NO-SIGNED-ZEROS-NEXT: fminnm s0, s0, s1 |
| ; NO-SIGNED-ZEROS-NEXT: ret |
| entry: |
| %i = fptosi float %x to i32 |
| %lower = call i32 @llvm.smax.i32(i32 %i, i32 -512) |
| %clamped = call i32 @llvm.smin.i32(i32 %lower, i32 1023) |
| %f = sitofp i32 %clamped to float |
| ret float %f |
| } |
| |
| define float @test_unsigned_min_max(float %x) { |
| ; SIGNED-ZEROS-LABEL: test_unsigned_min_max: |
| ; SIGNED-ZEROS: // %bb.0: // %entry |
| ; SIGNED-ZEROS-NEXT: fcvtzu w9, s0 |
| ; SIGNED-ZEROS-NEXT: mov w8, #512 // =0x200 |
| ; SIGNED-ZEROS-NEXT: cmp w9, #512 |
| ; SIGNED-ZEROS-NEXT: csel w8, w9, w8, hi |
| ; SIGNED-ZEROS-NEXT: mov w9, #1023 // =0x3ff |
| ; SIGNED-ZEROS-NEXT: cmp w8, #1023 |
| ; SIGNED-ZEROS-NEXT: csel w8, w8, w9, lo |
| ; SIGNED-ZEROS-NEXT: ucvtf s0, w8 |
| ; SIGNED-ZEROS-NEXT: ret |
| ; |
| ; NO-SIGNED-ZEROS-LABEL: test_unsigned_min_max: |
| ; NO-SIGNED-ZEROS: // %bb.0: // %entry |
| ; NO-SIGNED-ZEROS-NEXT: movi v1.2s, #68, lsl #24 |
| ; NO-SIGNED-ZEROS-NEXT: frintz s0, s0 |
| ; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000 |
| ; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16 |
| ; NO-SIGNED-ZEROS-NEXT: fmaxnm s0, s0, s1 |
| ; NO-SIGNED-ZEROS-NEXT: fmov s1, w8 |
| ; NO-SIGNED-ZEROS-NEXT: fminnm s0, s0, s1 |
| ; NO-SIGNED-ZEROS-NEXT: ret |
| entry: |
| %i = fptoui float %x to i32 |
| %lower = call i32 @llvm.umax.i32(i32 %i, i32 512) |
| %clamped = call i32 @llvm.umin.i32(i32 %lower, i32 1023) |
| %f = uitofp i32 %clamped to float |
| ret float %f |
| } |
| |
| ; 16777217 is NOT exactly representable in f32. |
| define float @test_inexact_16777217(float %x) { |
| ; CHECK-LABEL: test_inexact_16777217: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fcvtzs w8, s0 |
| ; CHECK-NEXT: mov w9, #16777216 // =0x1000000 |
| ; CHECK-NEXT: cmp w8, w9 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: movk w9, #256, lsl #16 |
| ; CHECK-NEXT: csel w8, w8, w9, le |
| ; CHECK-NEXT: scvtf s0, w8 |
| ; CHECK-NEXT: ret |
| entry: |
| %i = fptosi float %x to i32 |
| %clamped = call i32 @llvm.smin.i32(i32 %i, i32 16777217) |
| %f = sitofp i32 %clamped to float |
| ret float %f |
| } |
| |
| define <4 x float> @test_signed_v4f32(<4 x float> %x) { |
| ; SIGNED-ZEROS-LABEL: test_signed_v4f32: |
| ; SIGNED-ZEROS: // %bb.0: // %entry |
| ; SIGNED-ZEROS-NEXT: fcvtzs v0.4s, v0.4s |
| ; SIGNED-ZEROS-NEXT: scvtf v0.4s, v0.4s |
| ; SIGNED-ZEROS-NEXT: ret |
| ; |
| ; NO-SIGNED-ZEROS-LABEL: test_signed_v4f32: |
| ; NO-SIGNED-ZEROS: // %bb.0: // %entry |
| ; NO-SIGNED-ZEROS-NEXT: frintz v0.4s, v0.4s |
| ; NO-SIGNED-ZEROS-NEXT: ret |
| entry: |
| %i = fptosi <4 x float> %x to <4 x i32> |
| %f = sitofp <4 x i32> %i to <4 x float> |
| ret <4 x float> %f |
| } |
| |
| define <4 x float> @test_unsigned_v4f32(<4 x float> %x) { |
| ; SIGNED-ZEROS-LABEL: test_unsigned_v4f32: |
| ; SIGNED-ZEROS: // %bb.0: // %entry |
| ; SIGNED-ZEROS-NEXT: fcvtzu v0.4s, v0.4s |
| ; SIGNED-ZEROS-NEXT: ucvtf v0.4s, v0.4s |
| ; SIGNED-ZEROS-NEXT: ret |
| ; |
| ; NO-SIGNED-ZEROS-LABEL: test_unsigned_v4f32: |
| ; NO-SIGNED-ZEROS: // %bb.0: // %entry |
| ; NO-SIGNED-ZEROS-NEXT: frintz v0.4s, v0.4s |
| ; NO-SIGNED-ZEROS-NEXT: ret |
| entry: |
| %i = fptoui <4 x float> %x to <4 x i32> |
| %f = uitofp <4 x i32> %i to <4 x float> |
| ret <4 x float> %f |
| } |
| |
| define <2 x double> @test_signed_v2f64(<2 x double> %x) { |
| ; SIGNED-ZEROS-LABEL: test_signed_v2f64: |
| ; SIGNED-ZEROS: // %bb.0: // %entry |
| ; SIGNED-ZEROS-NEXT: fcvtzs v0.2d, v0.2d |
| ; SIGNED-ZEROS-NEXT: scvtf v0.2d, v0.2d |
| ; SIGNED-ZEROS-NEXT: ret |
| ; |
| ; NO-SIGNED-ZEROS-LABEL: test_signed_v2f64: |
| ; NO-SIGNED-ZEROS: // %bb.0: // %entry |
| ; NO-SIGNED-ZEROS-NEXT: frintz v0.2d, v0.2d |
| ; NO-SIGNED-ZEROS-NEXT: ret |
| entry: |
| %i = fptosi <2 x double> %x to <2 x i64> |
| %f = sitofp <2 x i64> %i to <2 x double> |
| ret <2 x double> %f |
| } |
| |
| define <2 x double> @test_unsigned_v2f64(<2 x double> %x) { |
| ; SIGNED-ZEROS-LABEL: test_unsigned_v2f64: |
| ; SIGNED-ZEROS: // %bb.0: // %entry |
| ; SIGNED-ZEROS-NEXT: fcvtzu v0.2d, v0.2d |
| ; SIGNED-ZEROS-NEXT: ucvtf v0.2d, v0.2d |
| ; SIGNED-ZEROS-NEXT: ret |
| ; |
| ; NO-SIGNED-ZEROS-LABEL: test_unsigned_v2f64: |
| ; NO-SIGNED-ZEROS: // %bb.0: // %entry |
| ; NO-SIGNED-ZEROS-NEXT: frintz v0.2d, v0.2d |
| ; NO-SIGNED-ZEROS-NEXT: ret |
| entry: |
| %i = fptoui <2 x double> %x to <2 x i64> |
| %f = uitofp <2 x i64> %i to <2 x double> |
| ret <2 x double> %f |
| } |
| |
| define <4 x float> @test_signed_v4f32_min_max(<4 x float> %x) { |
| ; SIGNED-ZEROS-LABEL: test_signed_v4f32_min_max: |
| ; SIGNED-ZEROS: // %bb.0: // %entry |
| ; SIGNED-ZEROS-NEXT: fcvtzs v0.4s, v0.4s |
| ; SIGNED-ZEROS-NEXT: mvni v1.4s, #1, msl #8 |
| ; SIGNED-ZEROS-NEXT: movi v2.4s, #3, msl #8 |
| ; SIGNED-ZEROS-NEXT: smax v0.4s, v0.4s, v1.4s |
| ; SIGNED-ZEROS-NEXT: smin v0.4s, v0.4s, v2.4s |
| ; SIGNED-ZEROS-NEXT: scvtf v0.4s, v0.4s |
| ; SIGNED-ZEROS-NEXT: ret |
| ; |
| ; NO-SIGNED-ZEROS-LABEL: test_signed_v4f32_min_max: |
| ; NO-SIGNED-ZEROS: // %bb.0: // %entry |
| ; NO-SIGNED-ZEROS-NEXT: movi v1.4s, #196, lsl #24 |
| ; NO-SIGNED-ZEROS-NEXT: frintz v0.4s, v0.4s |
| ; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000 |
| ; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16 |
| ; NO-SIGNED-ZEROS-NEXT: fmaxnm v0.4s, v0.4s, v1.4s |
| ; NO-SIGNED-ZEROS-NEXT: dup v1.4s, w8 |
| ; NO-SIGNED-ZEROS-NEXT: fminnm v0.4s, v0.4s, v1.4s |
| ; NO-SIGNED-ZEROS-NEXT: ret |
| entry: |
| %i = fptosi <4 x float> %x to <4 x i32> |
| %lower = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %i, <4 x i32> splat (i32 -512)) |
| %clamped = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %lower, <4 x i32> splat (i32 1023)) |
| %f = sitofp <4 x i32> %clamped to <4 x float> |
| ret <4 x float> %f |
| } |
| |
| define <4 x float> @test_unsigned_v4f32_min_max(<4 x float> %x) { |
| ; SIGNED-ZEROS-LABEL: test_unsigned_v4f32_min_max: |
| ; SIGNED-ZEROS: // %bb.0: // %entry |
| ; SIGNED-ZEROS-NEXT: movi v1.4s, #2, lsl #8 |
| ; SIGNED-ZEROS-NEXT: fcvtzu v0.4s, v0.4s |
| ; SIGNED-ZEROS-NEXT: movi v2.4s, #3, msl #8 |
| ; SIGNED-ZEROS-NEXT: umax v0.4s, v0.4s, v1.4s |
| ; SIGNED-ZEROS-NEXT: umin v0.4s, v0.4s, v2.4s |
| ; SIGNED-ZEROS-NEXT: ucvtf v0.4s, v0.4s |
| ; SIGNED-ZEROS-NEXT: ret |
| ; |
| ; NO-SIGNED-ZEROS-LABEL: test_unsigned_v4f32_min_max: |
| ; NO-SIGNED-ZEROS: // %bb.0: // %entry |
| ; NO-SIGNED-ZEROS-NEXT: movi v1.4s, #68, lsl #24 |
| ; NO-SIGNED-ZEROS-NEXT: frintz v0.4s, v0.4s |
| ; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000 |
| ; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16 |
| ; NO-SIGNED-ZEROS-NEXT: fmaxnm v0.4s, v0.4s, v1.4s |
| ; NO-SIGNED-ZEROS-NEXT: dup v1.4s, w8 |
| ; NO-SIGNED-ZEROS-NEXT: fminnm v0.4s, v0.4s, v1.4s |
| ; NO-SIGNED-ZEROS-NEXT: ret |
| entry: |
| %i = fptoui <4 x float> %x to <4 x i32> |
| %lower = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %i, <4 x i32> splat (i32 512)) |
| %clamped = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %lower, <4 x i32> splat (i32 1023)) |
| %f = uitofp <4 x i32> %clamped to <4 x float> |
| ret <4 x float> %f |
| } |
| |
| |
| define i1 @test_fcmp(float %x) { |
| ; CHECK-LABEL: test_fcmp: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: frintz s0, s0 |
| ; CHECK-NEXT: fcmp s0, #0.0 |
| ; CHECK-NEXT: cset w0, eq |
| ; CHECK-NEXT: ret |
| %conv1 = fptosi float %x to i32 |
| %conv2 = sitofp i32 %conv1 to float |
| %cmp = fcmp oeq float %conv2, 0.0 |
| ret i1 %cmp |
| } |
| |
| define float @test_fabs(float %x) { |
| ; CHECK-LABEL: test_fabs: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: frintz s0, s0 |
| ; CHECK-NEXT: fabs s0, s0 |
| ; CHECK-NEXT: ret |
| %conv1 = fptosi float %x to i32 |
| %conv2 = sitofp i32 %conv1 to float |
| %abs = call float @llvm.fabs.f32(float %conv2) |
| ret float %abs |
| } |
| |
| define float @test_copysign(float %x, float %y) { |
| ; CHECK-LABEL: test_copysign: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: frintz s0, s0 |
| ; CHECK-NEXT: mvni v2.4s, #128, lsl #24 |
| ; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 |
| ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b |
| ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 |
| ; CHECK-NEXT: ret |
| %conv1 = fptosi float %x to i32 |
| %conv2 = sitofp i32 %conv1 to float |
| %combine = call float @llvm.copysign.f32(float %conv2, float %y) |
| ret float %combine |
| } |
| |
| define float @test_fadd(float %x) { |
| ; CHECK-LABEL: test_fadd: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: frintz s0, s0 |
| ; CHECK-NEXT: fmov s1, #1.00000000 |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: ret |
| %conv1 = fptosi float %x to i32 |
| %conv2 = sitofp i32 %conv1 to float |
| %add = fadd float %conv2, 1.0 |
| ret float %add |
| } |
| |
| define float @test_fsub(float %x) { |
| ; CHECK-LABEL: test_fsub: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: frintz s0, s0 |
| ; CHECK-NEXT: fmov s1, #-1.00000000 |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: ret |
| %conv1 = fptosi float %x to i32 |
| %conv2 = sitofp i32 %conv1 to float |
| %sub = fsub float %conv2, 1.0 |
| ret float %sub |
| } |
| |
| declare i32 @llvm.smin.i32(i32, i32) |
| declare i32 @llvm.smax.i32(i32, i32) |
| declare i32 @llvm.umin.i32(i32, i32) |
| declare i32 @llvm.umax.i32(i32, i32) |
| declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) |
| declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) |
| declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) |
| declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) |
| declare float @llvm.fabs.f32(float) |
| declare float @llvm.copysign.f32(float, float) |