blob: a50716e4ab183b8bfaac1348791cb2acb5dc1952 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s --check-prefixes=CHECK,SIGNED-ZEROS
; RUN: llc -mtriple=aarch64 --enable-no-signed-zeros-fp-math < %s | FileCheck %s --check-prefixes=CHECK,NO-SIGNED-ZEROS
; Test folding of float->int->float roundtrips into float-only operations.
; The optimization could converts patterns like:
; sitofp(fptosi(x)) -> ftrunc(x)
; sitofp(smin(fptosi(x), C)) -> fminnum(ftrunc(x), (float)C)
; This is relevant for AArch64 as it avoids GPR bouncing and keeps computation in SIMD/FP registers.
define float @test_signed_basic(float %x) {
; SIGNED-ZEROS-LABEL: test_signed_basic:
; SIGNED-ZEROS: // %bb.0: // %entry
; SIGNED-ZEROS-NEXT: fcvtzs s0, s0
; SIGNED-ZEROS-NEXT: scvtf s0, s0
; SIGNED-ZEROS-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_signed_basic:
; NO-SIGNED-ZEROS: // %bb.0: // %entry
; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
; NO-SIGNED-ZEROS-NEXT: ret
entry:
%i = fptosi float %x to i32
%f = sitofp i32 %i to float
ret float %f
}
define float @test_unsigned_basic(float %x) {
; SIGNED-ZEROS-LABEL: test_unsigned_basic:
; SIGNED-ZEROS: // %bb.0: // %entry
; SIGNED-ZEROS-NEXT: fcvtzu s0, s0
; SIGNED-ZEROS-NEXT: ucvtf s0, s0
; SIGNED-ZEROS-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_unsigned_basic:
; NO-SIGNED-ZEROS: // %bb.0: // %entry
; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
; NO-SIGNED-ZEROS-NEXT: ret
entry:
%i = fptoui float %x to i32
%f = uitofp i32 %i to float
ret float %f
}
define float @test_signed_min_max(float %x) {
; SIGNED-ZEROS-LABEL: test_signed_min_max:
; SIGNED-ZEROS: // %bb.0: // %entry
; SIGNED-ZEROS-NEXT: fcvtzs w9, s0
; SIGNED-ZEROS-NEXT: mov w8, #-512 // =0xfffffe00
; SIGNED-ZEROS-NEXT: cmn w9, #512
; SIGNED-ZEROS-NEXT: csel w8, w9, w8, gt
; SIGNED-ZEROS-NEXT: mov w9, #1023 // =0x3ff
; SIGNED-ZEROS-NEXT: cmp w8, #1023
; SIGNED-ZEROS-NEXT: csel w8, w8, w9, lt
; SIGNED-ZEROS-NEXT: scvtf s0, w8
; SIGNED-ZEROS-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_signed_min_max:
; NO-SIGNED-ZEROS: // %bb.0: // %entry
; NO-SIGNED-ZEROS-NEXT: movi v1.2s, #196, lsl #24
; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000
; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16
; NO-SIGNED-ZEROS-NEXT: fmaxnm s0, s0, s1
; NO-SIGNED-ZEROS-NEXT: fmov s1, w8
; NO-SIGNED-ZEROS-NEXT: fminnm s0, s0, s1
; NO-SIGNED-ZEROS-NEXT: ret
entry:
%i = fptosi float %x to i32
%lower = call i32 @llvm.smax.i32(i32 %i, i32 -512)
%clamped = call i32 @llvm.smin.i32(i32 %lower, i32 1023)
%f = sitofp i32 %clamped to float
ret float %f
}
define float @test_unsigned_min_max(float %x) {
; SIGNED-ZEROS-LABEL: test_unsigned_min_max:
; SIGNED-ZEROS: // %bb.0: // %entry
; SIGNED-ZEROS-NEXT: fcvtzu w9, s0
; SIGNED-ZEROS-NEXT: mov w8, #512 // =0x200
; SIGNED-ZEROS-NEXT: cmp w9, #512
; SIGNED-ZEROS-NEXT: csel w8, w9, w8, hi
; SIGNED-ZEROS-NEXT: mov w9, #1023 // =0x3ff
; SIGNED-ZEROS-NEXT: cmp w8, #1023
; SIGNED-ZEROS-NEXT: csel w8, w8, w9, lo
; SIGNED-ZEROS-NEXT: ucvtf s0, w8
; SIGNED-ZEROS-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_unsigned_min_max:
; NO-SIGNED-ZEROS: // %bb.0: // %entry
; NO-SIGNED-ZEROS-NEXT: movi v1.2s, #68, lsl #24
; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000
; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16
; NO-SIGNED-ZEROS-NEXT: fmaxnm s0, s0, s1
; NO-SIGNED-ZEROS-NEXT: fmov s1, w8
; NO-SIGNED-ZEROS-NEXT: fminnm s0, s0, s1
; NO-SIGNED-ZEROS-NEXT: ret
entry:
%i = fptoui float %x to i32
%lower = call i32 @llvm.umax.i32(i32 %i, i32 512)
%clamped = call i32 @llvm.umin.i32(i32 %lower, i32 1023)
%f = uitofp i32 %clamped to float
ret float %f
}
; 16777217 is NOT exactly representable in f32.
define float @test_inexact_16777217(float %x) {
; CHECK-LABEL: test_inexact_16777217:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs w8, s0
; CHECK-NEXT: mov w9, #16777216 // =0x1000000
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: movk w9, #256, lsl #16
; CHECK-NEXT: csel w8, w8, w9, le
; CHECK-NEXT: scvtf s0, w8
; CHECK-NEXT: ret
entry:
%i = fptosi float %x to i32
%clamped = call i32 @llvm.smin.i32(i32 %i, i32 16777217)
%f = sitofp i32 %clamped to float
ret float %f
}
define <4 x float> @test_signed_v4f32(<4 x float> %x) {
; SIGNED-ZEROS-LABEL: test_signed_v4f32:
; SIGNED-ZEROS: // %bb.0: // %entry
; SIGNED-ZEROS-NEXT: fcvtzs v0.4s, v0.4s
; SIGNED-ZEROS-NEXT: scvtf v0.4s, v0.4s
; SIGNED-ZEROS-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_signed_v4f32:
; NO-SIGNED-ZEROS: // %bb.0: // %entry
; NO-SIGNED-ZEROS-NEXT: frintz v0.4s, v0.4s
; NO-SIGNED-ZEROS-NEXT: ret
entry:
%i = fptosi <4 x float> %x to <4 x i32>
%f = sitofp <4 x i32> %i to <4 x float>
ret <4 x float> %f
}
define <4 x float> @test_unsigned_v4f32(<4 x float> %x) {
; SIGNED-ZEROS-LABEL: test_unsigned_v4f32:
; SIGNED-ZEROS: // %bb.0: // %entry
; SIGNED-ZEROS-NEXT: fcvtzu v0.4s, v0.4s
; SIGNED-ZEROS-NEXT: ucvtf v0.4s, v0.4s
; SIGNED-ZEROS-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_unsigned_v4f32:
; NO-SIGNED-ZEROS: // %bb.0: // %entry
; NO-SIGNED-ZEROS-NEXT: frintz v0.4s, v0.4s
; NO-SIGNED-ZEROS-NEXT: ret
entry:
%i = fptoui <4 x float> %x to <4 x i32>
%f = uitofp <4 x i32> %i to <4 x float>
ret <4 x float> %f
}
define <2 x double> @test_signed_v2f64(<2 x double> %x) {
; SIGNED-ZEROS-LABEL: test_signed_v2f64:
; SIGNED-ZEROS: // %bb.0: // %entry
; SIGNED-ZEROS-NEXT: fcvtzs v0.2d, v0.2d
; SIGNED-ZEROS-NEXT: scvtf v0.2d, v0.2d
; SIGNED-ZEROS-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_signed_v2f64:
; NO-SIGNED-ZEROS: // %bb.0: // %entry
; NO-SIGNED-ZEROS-NEXT: frintz v0.2d, v0.2d
; NO-SIGNED-ZEROS-NEXT: ret
entry:
%i = fptosi <2 x double> %x to <2 x i64>
%f = sitofp <2 x i64> %i to <2 x double>
ret <2 x double> %f
}
define <2 x double> @test_unsigned_v2f64(<2 x double> %x) {
; SIGNED-ZEROS-LABEL: test_unsigned_v2f64:
; SIGNED-ZEROS: // %bb.0: // %entry
; SIGNED-ZEROS-NEXT: fcvtzu v0.2d, v0.2d
; SIGNED-ZEROS-NEXT: ucvtf v0.2d, v0.2d
; SIGNED-ZEROS-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_unsigned_v2f64:
; NO-SIGNED-ZEROS: // %bb.0: // %entry
; NO-SIGNED-ZEROS-NEXT: frintz v0.2d, v0.2d
; NO-SIGNED-ZEROS-NEXT: ret
entry:
%i = fptoui <2 x double> %x to <2 x i64>
%f = uitofp <2 x i64> %i to <2 x double>
ret <2 x double> %f
}
define <4 x float> @test_signed_v4f32_min_max(<4 x float> %x) {
; SIGNED-ZEROS-LABEL: test_signed_v4f32_min_max:
; SIGNED-ZEROS: // %bb.0: // %entry
; SIGNED-ZEROS-NEXT: fcvtzs v0.4s, v0.4s
; SIGNED-ZEROS-NEXT: mvni v1.4s, #1, msl #8
; SIGNED-ZEROS-NEXT: movi v2.4s, #3, msl #8
; SIGNED-ZEROS-NEXT: smax v0.4s, v0.4s, v1.4s
; SIGNED-ZEROS-NEXT: smin v0.4s, v0.4s, v2.4s
; SIGNED-ZEROS-NEXT: scvtf v0.4s, v0.4s
; SIGNED-ZEROS-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_signed_v4f32_min_max:
; NO-SIGNED-ZEROS: // %bb.0: // %entry
; NO-SIGNED-ZEROS-NEXT: movi v1.4s, #196, lsl #24
; NO-SIGNED-ZEROS-NEXT: frintz v0.4s, v0.4s
; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000
; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16
; NO-SIGNED-ZEROS-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
; NO-SIGNED-ZEROS-NEXT: dup v1.4s, w8
; NO-SIGNED-ZEROS-NEXT: fminnm v0.4s, v0.4s, v1.4s
; NO-SIGNED-ZEROS-NEXT: ret
entry:
%i = fptosi <4 x float> %x to <4 x i32>
%lower = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %i, <4 x i32> splat (i32 -512))
%clamped = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %lower, <4 x i32> splat (i32 1023))
%f = sitofp <4 x i32> %clamped to <4 x float>
ret <4 x float> %f
}
define <4 x float> @test_unsigned_v4f32_min_max(<4 x float> %x) {
; SIGNED-ZEROS-LABEL: test_unsigned_v4f32_min_max:
; SIGNED-ZEROS: // %bb.0: // %entry
; SIGNED-ZEROS-NEXT: movi v1.4s, #2, lsl #8
; SIGNED-ZEROS-NEXT: fcvtzu v0.4s, v0.4s
; SIGNED-ZEROS-NEXT: movi v2.4s, #3, msl #8
; SIGNED-ZEROS-NEXT: umax v0.4s, v0.4s, v1.4s
; SIGNED-ZEROS-NEXT: umin v0.4s, v0.4s, v2.4s
; SIGNED-ZEROS-NEXT: ucvtf v0.4s, v0.4s
; SIGNED-ZEROS-NEXT: ret
;
; NO-SIGNED-ZEROS-LABEL: test_unsigned_v4f32_min_max:
; NO-SIGNED-ZEROS: // %bb.0: // %entry
; NO-SIGNED-ZEROS-NEXT: movi v1.4s, #68, lsl #24
; NO-SIGNED-ZEROS-NEXT: frintz v0.4s, v0.4s
; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000
; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16
; NO-SIGNED-ZEROS-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
; NO-SIGNED-ZEROS-NEXT: dup v1.4s, w8
; NO-SIGNED-ZEROS-NEXT: fminnm v0.4s, v0.4s, v1.4s
; NO-SIGNED-ZEROS-NEXT: ret
entry:
%i = fptoui <4 x float> %x to <4 x i32>
%lower = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %i, <4 x i32> splat (i32 512))
%clamped = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %lower, <4 x i32> splat (i32 1023))
%f = uitofp <4 x i32> %clamped to <4 x float>
ret <4 x float> %f
}
define i1 @test_fcmp(float %x) {
; CHECK-LABEL: test_fcmp:
; CHECK: // %bb.0:
; CHECK-NEXT: frintz s0, s0
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%conv1 = fptosi float %x to i32
%conv2 = sitofp i32 %conv1 to float
%cmp = fcmp oeq float %conv2, 0.0
ret i1 %cmp
}
define float @test_fabs(float %x) {
; CHECK-LABEL: test_fabs:
; CHECK: // %bb.0:
; CHECK-NEXT: frintz s0, s0
; CHECK-NEXT: fabs s0, s0
; CHECK-NEXT: ret
%conv1 = fptosi float %x to i32
%conv2 = sitofp i32 %conv1 to float
%abs = call float @llvm.fabs.f32(float %conv2)
ret float %abs
}
define float @test_copysign(float %x, float %y) {
; CHECK-LABEL: test_copysign:
; CHECK: // %bb.0:
; CHECK-NEXT: frintz s0, s0
; CHECK-NEXT: mvni v2.4s, #128, lsl #24
; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: ret
%conv1 = fptosi float %x to i32
%conv2 = sitofp i32 %conv1 to float
%combine = call float @llvm.copysign.f32(float %conv2, float %y)
ret float %combine
}
define float @test_fadd(float %x) {
; CHECK-LABEL: test_fadd:
; CHECK: // %bb.0:
; CHECK-NEXT: frintz s0, s0
; CHECK-NEXT: fmov s1, #1.00000000
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
%conv1 = fptosi float %x to i32
%conv2 = sitofp i32 %conv1 to float
%add = fadd float %conv2, 1.0
ret float %add
}
define float @test_fsub(float %x) {
; CHECK-LABEL: test_fsub:
; CHECK: // %bb.0:
; CHECK-NEXT: frintz s0, s0
; CHECK-NEXT: fmov s1, #-1.00000000
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
%conv1 = fptosi float %x to i32
%conv2 = sitofp i32 %conv1 to float
%sub = fsub float %conv2, 1.0
ret float %sub
}
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
declare i32 @llvm.umax.i32(i32, i32)
declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
declare float @llvm.fabs.f32(float)
declare float @llvm.copysign.f32(float, float)