[AArch64] Optimize floating point materialization

This patch changes isFPImmLegal to return if the value can be enconded
as the immediate operand of a logical instruction besides checking if
for immediate field for fmov.

This optimizes some floating point materization, inclusive values
used on isinf lowering.

Reviewed By: rengolin, efriedma, evandro

Differential Revision: https://reviews.llvm.org/D57044


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352866 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index a63ef54..8b033ee 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -405,10 +405,9 @@
   bool Is64Bit = (VT == MVT::f64);
   // This checks to see if we can use FMOV instructions to materialize
   // a constant, otherwise we have to materialize via the constant pool.
-  if (TLI.isFPImmLegal(Val, VT)) {
-    int Imm =
-        Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
-    assert((Imm != -1) && "Cannot encode floating-point constant.");
+  int Imm =
+      Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
+  if (Imm != -1) {
     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
   }
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 03a3dac..7198cdc 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5424,34 +5424,30 @@
 }
 
 bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
-  // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
-  // FIXME: We should be able to handle f128 as well with a clever lowering.
-  if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
-                          (VT == MVT::f16 && Subtarget->hasFullFP16()))) {
-    LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString() << " imm value: 0\n");
-    return true;
-  }
-
   bool IsLegal = false;
-  SmallString<128> ImmStrVal;
-  Imm.toString(ImmStrVal);
-
+  // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
+  // 16-bit case when target has full fp16 support.
+  // FIXME: We should be able to handle f128 as well with a clever lowering.
+  const APInt ImmInt = Imm.bitcastToAPInt();
   if (VT == MVT::f64)
-    IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
+    IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
   else if (VT == MVT::f32)
-    IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
+    IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
   else if (VT == MVT::f16 && Subtarget->hasFullFP16())
-    IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
+    IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
+  // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
+  //       generate that fmov.
 
-  if (IsLegal) {
-    LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString()
-                      << " imm value: " << ImmStrVal << "\n");
-    return true;
-  }
+  // If we can not materialize in immediate field for fmov, check if the
+  // value can be encoded as the immediate operand of a logical instruction.
+  // The immediate value will be created with either MOVZ, MOVN, or ORR.
+  if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32))
+    IsLegal = AArch64_AM::isAnyMOVWMovAlias(ImmInt.getZExtValue(),
+                                            VT.getSizeInBits());
 
-  LLVM_DEBUG(dbgs() << "Illegal " << VT.getEVTString()
-                    << " imm value: " << ImmStrVal << "\n");
-  return false;
+  LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
+                    << " imm value: "; Imm.dump(););
+  return IsLegal;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/AArch64/fabs.ll b/test/CodeGen/AArch64/fabs.ll
index cd315ab..58f047a 100644
--- a/test/CodeGen/AArch64/fabs.ll
+++ b/test/CodeGen/AArch64/fabs.ll
@@ -22,11 +22,11 @@
 define float @still_not_fabs(float %x) #0 {
 ; CHECK-LABEL: still_not_fabs:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI1_0
-; CHECK-NEXT:    ldr s1, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT:    fneg s2, s0
-; CHECK-NEXT:    fcmp s0, s1
-; CHECK-NEXT:    fcsel s0, s0, s2, ge
+; CHECK-NEXT:    orr w8, wzr, #0x80000000
+; CHECK-NEXT:    fmov s2, w8
+; CHECK-NEXT:    fneg s1, s0
+; CHECK-NEXT:    fcmp s0, s2
+; CHECK-NEXT:    fcsel s0, s0, s1, ge
 ; CHECK-NEXT:    ret
   %cmp = fcmp nnan oge float %x, -0.0
   %sub = fsub nnan float -0.0, %x
diff --git a/test/CodeGen/AArch64/fadd-combines.ll b/test/CodeGen/AArch64/fadd-combines.ll
index 7332101..a204684 100644
--- a/test/CodeGen/AArch64/fadd-combines.ll
+++ b/test/CodeGen/AArch64/fadd-combines.ll
@@ -132,13 +132,13 @@
 define float @fadd_const_multiuse_fmf(float %x) {
 ; CHECK-LABEL: fadd_const_multiuse_fmf:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI10_0
-; CHECK-NEXT:    adrp x9, .LCPI10_1
-; CHECK-NEXT:    ldr s1, [x8, :lo12:.LCPI10_0]
-; CHECK-NEXT:    ldr s2, [x9, :lo12:.LCPI10_1]
-; CHECK-NEXT:    fadd s1, s0, s1
-; CHECK-NEXT:    fadd s0, s0, s2
-; CHECK-NEXT:    fadd s0, s1, s0
+; CHECK-DAG:     mov  [[W59:w[0-9]+]], #1114374144
+; CHECK-DAG:     mov  [[W42:w[0-9]+]], #1109917696
+; CHECK-DAG:     fmov [[FP59:s[0-9]+]], [[W59]]
+; CHECK-DAG:     fmov [[FP42:s[0-9]+]], [[W42]]
+; CHECK-NEXT:    fadd [[TMP1:s[0-9]+]], s0, [[FP42]]
+; CHECK-NEXT:    fadd [[TMP2:s[0-9]+]], s0, [[FP59]]
+; CHECK-NEXT:    fadd s0, [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret
   %a1 = fadd float %x, 42.0
   %a2 = fadd nsz reassoc float %a1, 17.0
@@ -153,13 +153,13 @@
 define float @fadd_const_multiuse_attr(float %x) #0 {
 ; CHECK-LABEL: fadd_const_multiuse_attr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, .LCPI11_1
-; CHECK-NEXT:    adrp x8, .LCPI11_0
-; CHECK-NEXT:    ldr s1, [x9, :lo12:.LCPI11_1]
-; CHECK-NEXT:    ldr s2, [x8, :lo12:.LCPI11_0]
-; CHECK-NEXT:    fadd s1, s0, s1
-; CHECK-NEXT:    fadd s1, s2, s1
-; CHECK-NEXT:    fadd s0, s0, s1
+; CHECK-DAG:     mov  [[W59:w[0-9]+]], #1114374144
+; CHECK-DAG:     mov  [[W17:w[0-9]+]], #1109917696
+; CHECK-NEXT:    fmov [[FP59:s[0-9]+]], [[W59]]
+; CHECK-NEXT:    fmov [[FP17:s[0-9]+]], [[W17]]
+; CHECK-NEXT:    fadd [[TMP1:s[0-9]+]], s0, [[FP59]]
+; CHECK-NEXT:    fadd [[TMP2:s[0-9]+]], [[FP17]], [[TMP1]]
+; CHECK-NEXT:    fadd s0, s0, [[TMP2]]
 ; CHECK-NEXT:    ret
   %a1 = fadd float %x, 42.0
   %a2 = fadd float %a1, 17.0
diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll
index de15bc0..a09d5d0 100644
--- a/test/CodeGen/AArch64/fpimm.ll
+++ b/test/CodeGen/AArch64/fpimm.ll
@@ -18,8 +18,10 @@
 
   %newval2 = fadd float %val, 128.0
   store volatile float %newval2, float* @varf32
-; CHECK-DAG: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.LCPI0_0
-; TINY-DAG: ldr {{s[0-9]+}}, .LCPI0_0
+; CHECK-DAG: mov [[W128:w[0-9]+]], #1124073472
+; CHECK-DAG: fmov {{s[0-9]+}}, [[W128]]
+; TINY-DAG: mov [[W128:w[0-9]+]], #1124073472
+; TINY-DAG: fmov {{s[0-9]+}}, [[W128]]
 
 ; CHECK: ret
 ; TINY: ret
@@ -38,8 +40,10 @@
 
   %newval2 = fadd double %val, 128.0
   store volatile double %newval2, double* @varf64
-; CHECK-DAG: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.LCPI1_0
-; TINY-DAG: ldr {{d[0-9]+}}, .LCPI1_0
+; CHECK-DAG: mov [[X128:x[0-9]+]], #4638707616191610880
+; CHECK-DAG: fmov {{d[0-9]+}}, [[X128]]
+; TINY-DAG: mov [[X128:x[0-9]+]], #4638707616191610880
+; TINY-DAG: fmov {{d[0-9]+}}, [[X128]]
 
 ; CHECK: ret
 ; TINY: ret
diff --git a/test/CodeGen/AArch64/isinf.ll b/test/CodeGen/AArch64/isinf.ll
new file mode 100644
index 0000000..e4607d0
--- /dev/null
+++ b/test/CodeGen/AArch64/isinf.ll
@@ -0,0 +1,62 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 < %s -o -| FileCheck %s
+
+declare half   @llvm.fabs.f16(half)
+declare float  @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
+declare fp128  @llvm.fabs.f128(fp128)
+
+; INFINITY requires loading the constant for _Float16
+define i32 @replace_isinf_call_f16(half %x) {
+; CHECK-LABEL: replace_isinf_call_f16:
+; CHECK:       adrp    [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]]
+; CHECK:       ldr     [[INFINITY:h[0-9]+]], {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}}
+; CHECK-NEXT:  fabs    [[ABS:h[0-9]+]], h0
+; CHECK-NEXT:  fcmp    [[ABS]], [[INFINITY]]
+; CHECK-NEXT:  cset    w0, eq
+  %abs = tail call half @llvm.fabs.f16(half %x)
+  %cmpinf = fcmp oeq half %abs, 0xH7C00
+  %ret = zext i1 %cmpinf to i32
+  ret i32 %ret
+}
+
+; Check if INFINITY for float is materialized
+define i32 @replace_isinf_call_f32(float %x) {
+; CHECK-LABEL: replace_isinf_call_f32:
+; CHECK:       orr    [[INFSCALARREG:w[0-9]+]], wzr, #0x7f800000
+; CHECK-NEXT:  fabs   [[ABS:s[0-9]+]], s0
+; CHECK-NEXT:  fmov   [[INFREG:s[0-9]+]], [[INFSCALARREG]]
+; CHECK-NEXT:  fcmp   [[ABS]], [[INFREG]]
+; CHECK-NEXT:  cset   w0, eq
+  %abs = tail call float @llvm.fabs.f32(float %x)
+  %cmpinf = fcmp oeq float %abs, 0x7FF0000000000000
+  %ret = zext i1 %cmpinf to i32
+  ret i32 %ret
+}
+
+; Check if INFINITY for double is materialized
+define i32 @replace_isinf_call_f64(double %x) {
+; CHECK-LABEL: replace_isinf_call_f64:
+; CHECK:       orr    [[INFSCALARREG:x[0-9]+]], xzr, #0x7ff0000000000000
+; CHECK-NEXT:  fabs   [[ABS:d[0-9]+]], d0
+; CHECK-NEXT:  fmov   [[INFREG:d[0-9]+]], [[INFSCALARREG]]
+; CHECK-NEXT:  fcmp   [[ABS]], [[INFREG]]
+; CHECK-NEXT:  cset   w0, eq
+  %abs = tail call double @llvm.fabs.f64(double %x)
+  %cmpinf = fcmp oeq double %abs, 0x7FF0000000000000
+  %ret = zext i1 %cmpinf to i32
+  ret i32 %ret
+}
+
+; For long double it still requires loading the constant.
+define i32 @replace_isinf_call_f128(fp128 %x) {
+; CHECK-LABEL: replace_isinf_call_f128:
+; CHECK:       adrp    [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]]
+; CHECK:       ldr     q1, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}}
+; CHECK:       bl      __eqtf2
+; CHECK:       cmp     w0, #0
+; CHECK:       cset    w0, eq
+  %abs = tail call fp128 @llvm.fabs.f128(fp128 %x)
+  %cmpinf = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
+  %ret = zext i1 %cmpinf to i32
+  ret i32 %ret
+}
diff --git a/test/CodeGen/AArch64/known-never-nan.ll b/test/CodeGen/AArch64/known-never-nan.ll
index 1ffbb9b..ef9fa5f 100644
--- a/test/CodeGen/AArch64/known-never-nan.ll
+++ b/test/CodeGen/AArch64/known-never-nan.ll
@@ -28,13 +28,13 @@
 define float @not_fmaxnm_maybe_nan(i32 %i1, i32 %i2) #0 {
 ; CHECK-LABEL: not_fmaxnm_maybe_nan:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI1_0
-; CHECK-NEXT:    ldr s0, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT:    ucvtf s1, w0
-; CHECK-NEXT:    ucvtf s2, w1
-; CHECK-NEXT:    fmov s3, #17.00000000
-; CHECK-NEXT:    fmul s0, s1, s0
-; CHECK-NEXT:    fadd s1, s2, s3
+; CHECK-NEXT:    orr w8, wzr, #0xff800000
+; CHECK-NEXT:    ucvtf s0, w0
+; CHECK-NEXT:    ucvtf s1, w1
+; CHECK-NEXT:    fmov s2, #17.00000000
+; CHECK-NEXT:    fmov s3, w8
+; CHECK-NEXT:    fmul s0, s0, s3
+; CHECK-NEXT:    fadd s1, s1, s2
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    fcsel s0, s0, s1, pl
 ; CHECK-NEXT:    ret
diff --git a/test/CodeGen/AArch64/literal_pools_float.ll b/test/CodeGen/AArch64/literal_pools_float.ll
index 3c70634..1a67394 100644
--- a/test/CodeGen/AArch64/literal_pools_float.ll
+++ b/test/CodeGen/AArch64/literal_pools_float.ll
@@ -13,18 +13,16 @@
 
   %floatval = load float, float* @varfloat
   %newfloat = fadd float %floatval, 128.0
-; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]]
-; CHECK: ldr [[LIT128:s[0-9]+]], [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]]
+; CHECK: mov [[W128:w[0-9]+]], #1124073472
+; CHECK: fmov [[LIT128:s[0-9]+]], [[W128]]
 ; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
-; CHECK-TINY: ldr [[LIT128:s[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]]
+; CHECK-TINY: mov [[W128:w[0-9]+]], #1124073472
+; CHECK-TINE: fmov [[LIT128:s[0-9]+]], [[W128]]
 ; CHECK-NOFP-TINY-NOT: ldr {{s[0-9]+}},
 
-; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g0_nc:[[CURLIT:.LCPI[0-9]+_[0-9]+]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g3:[[CURLIT]]
-; CHECK-LARGE: ldr {{s[0-9]+}}, [x[[LITADDR]]]
+; CHECK-LARGE: mov [[W128:w[0-9]+]], #1124073472
+; CHECK-LARGE: fmov [[LIT128:s[0-9]+]], [[W128]]
 ; CHECK-LARGE: fadd
 ; CHECK-NOFP-LARGE-NOT: ldr {{s[0-9]+}},
 ; CHECK-NOFP-LARGE-NOT: fadd
diff --git a/test/CodeGen/AArch64/win_cst_pool.ll b/test/CodeGen/AArch64/win_cst_pool.ll
index 5bcc919..f26e7aa 100644
--- a/test/CodeGen/AArch64/win_cst_pool.ll
+++ b/test/CodeGen/AArch64/win_cst_pool.ll
@@ -2,22 +2,22 @@
 ; RUN: llc < %s -mtriple=aarch64-win32-gnu | FileCheck -check-prefix=MINGW %s
 
 define double @double() {
-  ret double 0x0000000000800000
+  ret double 0x0000000000800001
 }
-; CHECK:              .globl  __real@0000000000800000
-; CHECK-NEXT:         .section        .rdata,"dr",discard,__real@0000000000800000
+; CHECK:              .globl  __real@0000000000800001
+; CHECK-NEXT:         .section        .rdata,"dr",discard,__real@0000000000800001
 ; CHECK-NEXT:         .p2align  3
-; CHECK-NEXT: __real@0000000000800000:
-; CHECK-NEXT:         .xword   8388608
+; CHECK-NEXT: __real@0000000000800001:
+; CHECK-NEXT:         .xword   8388609
 ; CHECK:      double:
-; CHECK:               adrp    x8, __real@0000000000800000
-; CHECK-NEXT:          ldr     d0, [x8, __real@0000000000800000]
+; CHECK:               adrp    x8, __real@0000000000800001
+; CHECK-NEXT:          ldr     d0, [x8, __real@0000000000800001]
 ; CHECK-NEXT:          ret
 
 ; MINGW:              .section        .rdata,"dr"
 ; MINGW-NEXT:         .p2align  3
 ; MINGW-NEXT: [[LABEL:\.LC.*]]:
-; MINGW-NEXT:         .xword   8388608
+; MINGW-NEXT:         .xword   8388609
 ; MINGW:      double:
 ; MINGW:               adrp    x8, [[LABEL]]
 ; MINGW-NEXT:          ldr     d0, [x8, [[LABEL]]]