[AArch64] Fix operation actions for FP16 vector intrinsics

Summary:
This patch changes the legalization action for some half-precision floating-
point vector intrinsics (FSIN, FLOG, etc.) from Promote to Expand. These ops
are not supported in hardware for half-precision vectors, but promotion is
not always possible (for v8f16 operands). Changing the action to Expand fixes
an assertion failure in the legalizer when the frontend produces such ops.
In addition, a quick microbenchmark shows that, in the v4f16 case,
expanding introduces fewer spills and is therefore slightly faster than
promoting.

Reviewers: t.p.northover, SjoerdMeijer

Reviewed By: SjoerdMeijer

Subscribers: javed.absar, kristof.beyls, llvm-commits

Differential Revision: https://reviews.llvm.org/D56296

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350825 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index c7f46a2..0e5e6d4 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -333,36 +333,38 @@
     setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
 
   setOperationAction(ISD::FREM,    MVT::f16,   Promote);
-  setOperationAction(ISD::FREM,    MVT::v4f16, Promote);
-  setOperationAction(ISD::FREM,    MVT::v8f16, Promote);
+  setOperationAction(ISD::FREM,    MVT::v4f16, Expand);
+  setOperationAction(ISD::FREM,    MVT::v8f16, Expand);
   setOperationAction(ISD::FPOW,    MVT::f16,   Promote);
-  setOperationAction(ISD::FPOW,    MVT::v4f16, Promote);
-  setOperationAction(ISD::FPOW,    MVT::v8f16, Promote);
+  setOperationAction(ISD::FPOW,    MVT::v4f16, Expand);
+  setOperationAction(ISD::FPOW,    MVT::v8f16, Expand);
   setOperationAction(ISD::FPOWI,   MVT::f16,   Promote);
+  setOperationAction(ISD::FPOWI,   MVT::v4f16, Expand);
+  setOperationAction(ISD::FPOWI,   MVT::v8f16, Expand);
   setOperationAction(ISD::FCOS,    MVT::f16,   Promote);
-  setOperationAction(ISD::FCOS,    MVT::v4f16, Promote);
-  setOperationAction(ISD::FCOS,    MVT::v8f16, Promote);
+  setOperationAction(ISD::FCOS,    MVT::v4f16, Expand);
+  setOperationAction(ISD::FCOS,    MVT::v8f16, Expand);
   setOperationAction(ISD::FSIN,    MVT::f16,   Promote);
-  setOperationAction(ISD::FSIN,    MVT::v4f16, Promote);
-  setOperationAction(ISD::FSIN,    MVT::v8f16, Promote);
+  setOperationAction(ISD::FSIN,    MVT::v4f16, Expand);
+  setOperationAction(ISD::FSIN,    MVT::v8f16, Expand);
   setOperationAction(ISD::FSINCOS, MVT::f16,   Promote);
-  setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote);
-  setOperationAction(ISD::FSINCOS, MVT::v8f16, Promote);
+  setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
+  setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
   setOperationAction(ISD::FEXP,    MVT::f16,   Promote);
-  setOperationAction(ISD::FEXP,    MVT::v4f16, Promote);
-  setOperationAction(ISD::FEXP,    MVT::v8f16, Promote);
+  setOperationAction(ISD::FEXP,    MVT::v4f16, Expand);
+  setOperationAction(ISD::FEXP,    MVT::v8f16, Expand);
   setOperationAction(ISD::FEXP2,   MVT::f16,   Promote);
-  setOperationAction(ISD::FEXP2,   MVT::v4f16, Promote);
-  setOperationAction(ISD::FEXP2,   MVT::v8f16, Promote);
+  setOperationAction(ISD::FEXP2,   MVT::v4f16, Expand);
+  setOperationAction(ISD::FEXP2,   MVT::v8f16, Expand);
   setOperationAction(ISD::FLOG,    MVT::f16,   Promote);
-  setOperationAction(ISD::FLOG,    MVT::v4f16, Promote);
-  setOperationAction(ISD::FLOG,    MVT::v8f16, Promote);
+  setOperationAction(ISD::FLOG,    MVT::v4f16, Expand);
+  setOperationAction(ISD::FLOG,    MVT::v8f16, Expand);
   setOperationAction(ISD::FLOG2,   MVT::f16,   Promote);
-  setOperationAction(ISD::FLOG2,   MVT::v4f16, Promote);
-  setOperationAction(ISD::FLOG2,   MVT::v8f16, Promote);
+  setOperationAction(ISD::FLOG2,   MVT::v4f16, Expand);
+  setOperationAction(ISD::FLOG2,   MVT::v8f16, Expand);
   setOperationAction(ISD::FLOG10,  MVT::f16,   Promote);
-  setOperationAction(ISD::FLOG10,  MVT::v4f16, Promote);
-  setOperationAction(ISD::FLOG10,  MVT::v8f16, Promote);
+  setOperationAction(ISD::FLOG10,  MVT::v4f16, Expand);
+  setOperationAction(ISD::FLOG10,  MVT::v8f16, Expand);
 
   if (!Subtarget->hasFullFP16()) {
     setOperationAction(ISD::SELECT,      MVT::f16,  Promote);
diff --git a/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll b/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
index 4e1de87..2d7976d 100644
--- a/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
+++ b/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
@@ -1,11 +1,325 @@
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=-fullfp16 | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 | FileCheck %s --check-prefix=CHECK-FP16
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=-fullfp16 \
+; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP16
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 \
+; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16
+
+;;; Half vectors
+
+%v4f16 = type <4 x half>
+
+define %v4f16 @test_v4f16.sqrt(%v4f16 %a) {
+  ; CHECK-LABEL:          test_v4f16.sqrt:
+  ; CHECK-NOFP16-COUNT-4: fsqrt s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           fsqrt.4h
+  ; CHECK-FP16-NEXT:      ret
+  %1 = call %v4f16 @llvm.sqrt.v4f16(%v4f16 %a)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.powi(%v4f16 %a, i32 %b) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v4f16.powi:
+  ; CHECK-COUNT-4: bl __powi
+  %1 = call %v4f16 @llvm.powi.v4f16(%v4f16 %a, i32 %b)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.sin(%v4f16 %a) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v4f16.sin:
+  ; CHECK-COUNT-4: bl sinf
+  %1 = call %v4f16 @llvm.sin.v4f16(%v4f16 %a)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.cos(%v4f16 %a) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v4f16.cos:
+  ; CHECK-COUNT-4: bl cosf
+  %1 = call %v4f16 @llvm.cos.v4f16(%v4f16 %a)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.pow(%v4f16 %a, %v4f16 %b) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v4f16.pow:
+  ; CHECK-COUNT-4: bl pow
+  %1 = call %v4f16 @llvm.pow.v4f16(%v4f16 %a, %v4f16 %b)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.exp(%v4f16 %a) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v4f16.exp:
+  ; CHECK-COUNT-4: bl exp
+  %1 = call %v4f16 @llvm.exp.v4f16(%v4f16 %a)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.exp2(%v4f16 %a) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v4f16.exp2:
+  ; CHECK-COUNT-4: bl exp2
+  %1 = call %v4f16 @llvm.exp2.v4f16(%v4f16 %a)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.log(%v4f16 %a) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v4f16.log:
+  ; CHECK-COUNT-4: bl log
+  %1 = call %v4f16 @llvm.log.v4f16(%v4f16 %a)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.log10(%v4f16 %a) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v4f16.log10:
+  ; CHECK-COUNT-4: bl log10
+  %1 = call %v4f16 @llvm.log10.v4f16(%v4f16 %a)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.log2(%v4f16 %a) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v4f16.log2:
+  ; CHECK-COUNT-4: bl log2
+  %1 = call %v4f16 @llvm.log2.v4f16(%v4f16 %a)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.fma(%v4f16 %a, %v4f16 %b, %v4f16 %c) {
+  ; CHECK-LABEL:          test_v4f16.fma:
+  ; CHECK-NOFP16-COUNT-4: fmadd s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           fmla.4h
+  %1 = call %v4f16 @llvm.fma.v4f16(%v4f16 %a, %v4f16 %b, %v4f16 %c)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.fabs(%v4f16 %a) {
+  ; CHECK-LABEL:          test_v4f16.fabs:
+  ; CHECK-NOFP16-COUNT-4: fabs s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           fabs.4h
+  ; CHECK-FP16-NEXT:      ret
+  %1 = call %v4f16 @llvm.fabs.v4f16(%v4f16 %a)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.floor(%v4f16 %a) {
+  ; CHECK-LABEL:          test_v4f16.floor:
+  ; CHECK-NOFP16-COUNT-4: frintm s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           frintm.4h
+  ; CHECK-FP16-NEXT:      ret
+  %1 = call %v4f16 @llvm.floor.v4f16(%v4f16 %a)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.ceil(%v4f16 %a) {
+  ; CHECK-LABEL:          test_v4f16.ceil:
+  ; CHECK-NOFP16-COUNT-4: frintp s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           frintp.4h
+  ; CHECK-FP16-NEXT:      ret
+  %1 = call %v4f16 @llvm.ceil.v4f16(%v4f16 %a)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.trunc(%v4f16 %a) {
+  ; CHECK-LABEL:          test_v4f16.trunc:
+  ; CHECK-NOFP16-COUNT-4: frintz s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           frintz.4h
+  ; CHECK-FP16-NEXT:      ret
+  %1 = call %v4f16 @llvm.trunc.v4f16(%v4f16 %a)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.rint(%v4f16 %a) {
+  ; CHECK-LABEL:          test_v4f16.rint:
+  ; CHECK-NOFP16-COUNT-4: frintx s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           frintx.4h
+  ; CHECK-FP16-NEXT:      ret
+  %1 = call %v4f16 @llvm.rint.v4f16(%v4f16 %a)
+  ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.nearbyint(%v4f16 %a) {
+  ; CHECK-LABEL:          test_v4f16.nearbyint:
+  ; CHECK-NOFP16-COUNT-4: frinti s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           frinti.4h
+  ; CHECK-FP16-NEXT:      ret
+  %1 = call %v4f16 @llvm.nearbyint.v4f16(%v4f16 %a)
+  ret %v4f16 %1
+}
+
+declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0
+declare %v4f16 @llvm.powi.v4f16(%v4f16, i32) #0
+declare %v4f16 @llvm.sin.v4f16(%v4f16) #0
+declare %v4f16 @llvm.cos.v4f16(%v4f16) #0
+declare %v4f16 @llvm.pow.v4f16(%v4f16, %v4f16) #0
+declare %v4f16 @llvm.exp.v4f16(%v4f16) #0
+declare %v4f16 @llvm.exp2.v4f16(%v4f16) #0
+declare %v4f16 @llvm.log.v4f16(%v4f16) #0
+declare %v4f16 @llvm.log10.v4f16(%v4f16) #0
+declare %v4f16 @llvm.log2.v4f16(%v4f16) #0
+declare %v4f16 @llvm.fma.v4f16(%v4f16, %v4f16, %v4f16) #0
+declare %v4f16 @llvm.fabs.v4f16(%v4f16) #0
+declare %v4f16 @llvm.floor.v4f16(%v4f16) #0
+declare %v4f16 @llvm.ceil.v4f16(%v4f16) #0
+declare %v4f16 @llvm.trunc.v4f16(%v4f16) #0
+declare %v4f16 @llvm.rint.v4f16(%v4f16) #0
+declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0
+
+;;;
+
+%v8f16 = type <8 x half>
+
+define %v8f16 @test_v8f16.sqrt(%v8f16 %a) {
+  ; CHECK-LABEL:          test_v8f16.sqrt:
+  ; CHECK-NOFP16-COUNT-8: fsqrt s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           fsqrt.8h
+  ; CHECK-FP16-NEXT:      ret
+  %1 = call %v8f16 @llvm.sqrt.v8f16(%v8f16 %a)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.powi(%v8f16 %a, i32 %b) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v8f16.powi:
+  ; CHECK-COUNT-8: bl __powi
+  %1 = call %v8f16 @llvm.powi.v8f16(%v8f16 %a, i32 %b)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.sin(%v8f16 %a) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v8f16.sin:
+  ; CHECK-COUNT-8: bl sinf
+  %1 = call %v8f16 @llvm.sin.v8f16(%v8f16 %a)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.cos(%v8f16 %a) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v8f16.cos:
+  ; CHECK-COUNT-8: bl cosf
+  %1 = call %v8f16 @llvm.cos.v8f16(%v8f16 %a)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.pow(%v8f16 %a, %v8f16 %b) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v8f16.pow:
+  ; CHECK-COUNT-8: bl pow
+  %1 = call %v8f16 @llvm.pow.v8f16(%v8f16 %a, %v8f16 %b)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.exp(%v8f16 %a) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v8f16.exp:
+  ; CHECK-COUNT-8: bl exp
+  %1 = call %v8f16 @llvm.exp.v8f16(%v8f16 %a)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.exp2(%v8f16 %a) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v8f16.exp2:
+  ; CHECK-COUNT-8: bl exp2
+  %1 = call %v8f16 @llvm.exp2.v8f16(%v8f16 %a)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.log(%v8f16 %a) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v8f16.log:
+  ; CHECK-COUNT-8: bl log
+  %1 = call %v8f16 @llvm.log.v8f16(%v8f16 %a)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.log10(%v8f16 %a) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v8f16.log10:
+  ; CHECK-COUNT-8: bl log10
+  %1 = call %v8f16 @llvm.log10.v8f16(%v8f16 %a)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.log2(%v8f16 %a) {
+  ; This operation is expanded, whether with or without +fullfp16.
+  ; CHECK-LABEL:   test_v8f16.log2:
+  ; CHECK-COUNT-8: bl log2
+  %1 = call %v8f16 @llvm.log2.v8f16(%v8f16 %a)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.fma(%v8f16 %a, %v8f16 %b, %v8f16 %c) {
+  ; CHECK-LABEL:          test_v8f16.fma:
+  ; CHECK-NOFP16-COUNT-8: fmadd s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           fmla.8h
+  %1 = call %v8f16 @llvm.fma.v8f16(%v8f16 %a, %v8f16 %b, %v8f16 %c)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.fabs(%v8f16 %a) {
+  ; CHECK-LABEL:          test_v8f16.fabs:
+  ; CHECK-NOFP16-COUNT-8: fabs s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           fabs.8h
+  ; CHECK-FP16-NEXT:      ret
+  %1 = call %v8f16 @llvm.fabs.v8f16(%v8f16 %a)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.floor(%v8f16 %a) {
+  ; CHECK-LABEL:     		  test_v8f16.floor:
+  ; CHECK-NOFP16-COUNT-8: frintm s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           frintm.8h
+  ; CHECK-FP16-NEXT:      ret
+  %1 = call %v8f16 @llvm.floor.v8f16(%v8f16 %a)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.ceil(%v8f16 %a) {
+  ; CHECK-LABEL:          test_v8f16.ceil:
+  ; CHECK-NOFP16-COUNT-8: frintp s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           frintp.8h
+  ; CHECK-FP16-NEXT:      ret
+  %1 = call %v8f16 @llvm.ceil.v8f16(%v8f16 %a)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.trunc(%v8f16 %a) {
+  ; CHECK-LABEL:          test_v8f16.trunc:
+  ; CHECK-NOFP16-COUNT-8: frintz s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           frintz.8h
+  ; CHECK-FP16-NEXT:      ret
+  %1 = call %v8f16 @llvm.trunc.v8f16(%v8f16 %a)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.rint(%v8f16 %a) {
+  ; CHECK-LABEL:          test_v8f16.rint:
+  ; CHECK-NOFP16-COUNT-8: frintx s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           frintx.8h
+  ; CHECK-FP16-NEXT:      ret
+  %1 = call %v8f16 @llvm.rint.v8f16(%v8f16 %a)
+  ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.nearbyint(%v8f16 %a) {
+  ; CHECK-LABEL:          test_v8f16.nearbyint:
+  ; CHECK-NOFP16-COUNT-8: frinti s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           frinti.8h
+  ; CHECK-FP16-NEXT:      ret
+  %1 = call %v8f16 @llvm.nearbyint.v8f16(%v8f16 %a)
+  ret %v8f16 %1
+}
+
+declare %v8f16 @llvm.sqrt.v8f16(%v8f16) #0
+declare %v8f16 @llvm.powi.v8f16(%v8f16, i32) #0
+declare %v8f16 @llvm.sin.v8f16(%v8f16) #0
+declare %v8f16 @llvm.cos.v8f16(%v8f16) #0
+declare %v8f16 @llvm.pow.v8f16(%v8f16, %v8f16) #0
+declare %v8f16 @llvm.exp.v8f16(%v8f16) #0
+declare %v8f16 @llvm.exp2.v8f16(%v8f16) #0
+declare %v8f16 @llvm.log.v8f16(%v8f16) #0
+declare %v8f16 @llvm.log10.v8f16(%v8f16) #0
+declare %v8f16 @llvm.log2.v8f16(%v8f16) #0
+declare %v8f16 @llvm.fma.v8f16(%v8f16, %v8f16, %v8f16) #0
+declare %v8f16 @llvm.fabs.v8f16(%v8f16) #0
+declare %v8f16 @llvm.floor.v8f16(%v8f16) #0
+declare %v8f16 @llvm.ceil.v8f16(%v8f16) #0
+declare %v8f16 @llvm.trunc.v8f16(%v8f16) #0
+declare %v8f16 @llvm.rint.v8f16(%v8f16) #0
+declare %v8f16 @llvm.nearbyint.v8f16(%v8f16) #0
 
 ;;; Float vectors
 
 %v2f32 = type <2 x float>
-%v4f16 = type <4 x half>
-%v8f16 = type <8 x half>
 
 ; CHECK-LABEL: test_v2f32.sqrt:
 define %v2f32 @test_v2f32.sqrt(%v2f32 %a) {
@@ -13,30 +327,6 @@
   %1 = call %v2f32 @llvm.sqrt.v2f32(%v2f32 %a)
   ret %v2f32 %1
 }
-define %v4f16 @test_v4f16.sqrt(%v4f16 %a) {
-; CHECK-LABEL: test_v4f16.sqrt:
-; CHECK:       fsqrt s{{.}}, s{{.}}
-; CHECK:       fsqrt s{{.}}, s{{.}}
-; CHECK:       fsqrt s{{.}}, s{{.}}
-; CHECK:       fsqrt s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.sqrt:
-; CHECK-FP16-NOT:   fcvt
-; CHECK-FP16:       fsqrt.4h
-; CHECK-FP16-NEXT:  ret
-  %1 = call %v4f16 @llvm.sqrt.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-define %v8f16 @test_v8f16.sqrt(%v8f16 %a) {
-; Filechecks are unwieldy with 16 fcvt and 8 fsqrt tests, so skipped for -fullfp16.
-
-; CHECK-FP16-LABEL: test_v8f16.sqrt:
-; CHECK-FP16-NOT:   fcvt
-; CHECK-FP16:       fsqrt.8h
-; CHECK-FP16-NEXT:  ret
-  %1 = call %v8f16 @llvm.sqrt.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
 ; CHECK: test_v2f32.powi:
 define %v2f32 @test_v2f32.powi(%v2f32 %a, i32 %b) {
   ; CHECK: pow
@@ -97,211 +387,44 @@
   %1 = call %v2f32 @llvm.fma.v2f32(%v2f32 %a, %v2f32 %b, %v2f32 %c)
   ret %v2f32 %1
 }
-define %v4f16 @test_v4f16.fma(%v4f16 %a, %v4f16 %b, %v4f16 %c) {
-; CHECK-LABEL: test_v4f16.fma:
-; CHECK:       fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}}
-; CHECK:       fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}}
-; CHECK:       fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}}
-; CHECK:       fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.fma:
-; CHECK-FP16-NOT:   fcvt
-; CHECK-FP16:       fmla.4h
-  %1 = call %v4f16 @llvm.fma.v4f16(%v4f16 %a, %v4f16 %b, %v4f16 %c)
-  ret %v4f16 %1
-}
-define %v8f16 @test_v8f16.fma(%v8f16 %a, %v8f16 %b, %v8f16 %c) {
-; Filechecks are unwieldy with 16 fcvt and 8 fma tests, so skipped for -fullfp16.
-
-; CHECK-FP16-LABEL: test_v8f16.fma:
-; CHECK-FP16-NOT:   fcvt
-; CHECK-FP16:       fmla.8h
-  %1 = call %v8f16 @llvm.fma.v8f16(%v8f16 %a, %v8f16 %b, %v8f16 %c)
-  ret %v8f16 %1
-}
 ; CHECK-LABEL: test_v2f32.fabs:
 define %v2f32 @test_v2f32.fabs(%v2f32 %a) {
   ; CHECK: fabs.2s
   %1 = call %v2f32 @llvm.fabs.v2f32(%v2f32 %a)
   ret %v2f32 %1
 }
-define %v4f16 @test_v4f16.fabs(%v4f16 %a) {
-; CHECK-LABEL: test_v4f16.fabs:
-; CHECK:       fabs s{{.}}, s{{.}}
-; CHECK:       fabs s{{.}}, s{{.}}
-; CHECK:       fabs s{{.}}, s{{.}}
-; CHECK:       fabs s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.fabs:
-; CHECK-FP16-NOT:   fcvt
-; CHECK-FP16:       fabs.4h
-; CHECK-FP16-NEXT:  ret
-  %1 = call %v4f16 @llvm.fabs.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-define %v8f16 @test_v8f16.fabs(%v8f16 %a) {
-; Filechecks are unwieldy with 16 fcvt and 8 fabs tests, so skipped for -fullfp16.
-
-; CHECK-FP16-LABEL: test_v8f16.fabs:
-; CHECK-FP16-NOT:   fcvt
-; CHECK-FP16:       fabs.8h
-; CHECK-FP16-NEXT:  ret
-  %1 = call %v8f16 @llvm.fabs.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
 ; CHECK-LABEL: test_v2f32.floor:
 define %v2f32 @test_v2f32.floor(%v2f32 %a) {
   ; CHECK: frintm.2s
   %1 = call %v2f32 @llvm.floor.v2f32(%v2f32 %a)
   ret %v2f32 %1
 }
-define %v4f16 @test_v4f16.floor(%v4f16 %a) {
-; CHECK-LABEL: test_v4f16.floor:
-; CHECK:       frintm s{{.}}, s{{.}}
-; CHECK:       frintm s{{.}}, s{{.}}
-; CHECK:       frintm s{{.}}, s{{.}}
-; CHECK:       frintm s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.floor:
-; CHECK-FP16-NOT:   fcvt
-; CHECK-FP16:       frintm.4h
-; CHECK-FP16-NEXT:  ret
-  %1 = call %v4f16 @llvm.floor.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-define %v8f16 @test_v8f16.floor(%v8f16 %a) {
-; Filechecks are unwieldy with 16 fcvt and 8 frintm tests, so skipped for -fullfp16.
-
-; CHECK-FP16-LABEL: test_v8f16.floor:
-; CHECK-FP16-NOT:   fcvt
-; CHECK-FP16:       frintm.8h
-; CHECK-FP16-NEXT:  ret
-  %1 = call %v8f16 @llvm.floor.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
 ; CHECK-LABEL: test_v2f32.ceil:
 define %v2f32 @test_v2f32.ceil(%v2f32 %a) {
   ; CHECK: frintp.2s
   %1 = call %v2f32 @llvm.ceil.v2f32(%v2f32 %a)
   ret %v2f32 %1
 }
-define %v4f16 @test_v4f16.ceil(%v4f16 %a) {
-; CHECK-LABEL: test_v4f16.ceil:
-; CHECK:       frintp s{{.}}, s{{.}}
-; CHECK:       frintp s{{.}}, s{{.}}
-; CHECK:       frintp s{{.}}, s{{.}}
-; CHECK:       frintp s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.ceil:
-; CHECK-FP16-NOT:   fcvt
-; CHECK-FP16:       frintp.4h
-; CHECK-FP16-NEXT:  ret
-  %1 = call %v4f16 @llvm.ceil.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-define %v8f16 @test_v8f16.ceil(%v8f16 %a) {
-; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16.
-
-; CHECK-FP16-LABEL: test_v8f16.ceil:
-; CHECK-FP16-NOT:   fcvt
-; CHECK-FP16:       frintp.8h
-; CHECK-FP16-NEXT:  ret
-  %1 = call %v8f16 @llvm.ceil.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
 ; CHECK-LABEL: test_v2f32.trunc:
 define %v2f32 @test_v2f32.trunc(%v2f32 %a) {
   ; CHECK: frintz.2s
   %1 = call %v2f32 @llvm.trunc.v2f32(%v2f32 %a)
   ret %v2f32 %1
 }
-define %v4f16 @test_v4f16.trunc(%v4f16 %a) {
-; CHECK-LABEL: test_v4f16.trunc:
-; CHECK:       frintz s{{.}}, s{{.}}
-; CHECK:       frintz s{{.}}, s{{.}}
-; CHECK:       frintz s{{.}}, s{{.}}
-; CHECK:       frintz s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.trunc:
-; CHECK-FP16:       frintz.4h
-; CHECK-FP16-NEXT:  ret
-  %1 = call %v4f16 @llvm.trunc.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-define %v8f16 @test_v8f16.trunc(%v8f16 %a) {
-; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16.
-
-; CHECK-FP16-LABEL: test_v8f16.trunc:
-; CHECK-FP16-NOT:   fcvt
-; CHECK-FP16:       frintz.8h
-; CHECK-FP16-NEXT:  ret
-  %1 = call %v8f16 @llvm.trunc.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
 ; CHECK-LABEL: test_v2f32.rint:
 define %v2f32 @test_v2f32.rint(%v2f32 %a) {
   ; CHECK: frintx.2s
   %1 = call %v2f32 @llvm.rint.v2f32(%v2f32 %a)
   ret %v2f32 %1
 }
-define %v4f16 @test_v4f16.rint(%v4f16 %a) {
-; CHECK-LABEL: test_v4f16.rint:
-; CHECK:       frintx s{{.}}, s{{.}}
-; CHECK:       frintx s{{.}}, s{{.}}
-; CHECK:       frintx s{{.}}, s{{.}}
-; CHECK:       frintx s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.rint:
-; CHECK-FP16-NOT:   fcvt
-; CHECK-FP16:       frintx.4h
-; CHECK-FP16-NEXT:  ret
-  %1 = call %v4f16 @llvm.rint.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-define %v8f16 @test_v8f16.rint(%v8f16 %a) {
-; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16.
-
-; CHECK-FP16-LABEL: test_v8f16.rint:
-; CHECK-FP16:       frintx.8h
-; CHECK-FP16-NEXT:  ret
-  %1 = call %v8f16 @llvm.rint.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
 ; CHECK-LABEL: test_v2f32.nearbyint:
 define %v2f32 @test_v2f32.nearbyint(%v2f32 %a) {
   ; CHECK: frinti.2s
   %1 = call %v2f32 @llvm.nearbyint.v2f32(%v2f32 %a)
   ret %v2f32 %1
 }
-define %v4f16 @test_v4f16.nearbyint(%v4f16 %a) {
-; CHECK-LABEL: test_v4f16.nearbyint:
-; CHECK:       frinti s{{.}}, s{{.}}
-; CHECK:       frinti s{{.}}, s{{.}}
-; CHECK:       frinti s{{.}}, s{{.}}
-; CHECK:       frinti s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.nearbyint:
-; CHECK-FP16-NOT:   fcvt
-; CHECK-FP16:       frinti.4h
-; CHECK-FP16-NEXT:  ret
-  %1 = call %v4f16 @llvm.nearbyint.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-define %v8f16 @test_v8f16.nearbyint(%v8f16 %a) {
-; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16.
-
-; CHECK-FP16-LABEL: test_v8f16.nearbyint:
-; CHECK-FP16-NOT:   fcvt
-; CHECK-FP16:       frinti.8h
-; CHECK-FP16-NEXT:  ret
-  %1 = call %v8f16 @llvm.nearbyint.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
 
 declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0
-declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0
-declare %v8f16 @llvm.sqrt.v8f16(%v8f16) #0
-
 declare %v2f32 @llvm.powi.v2f32(%v2f32, i32) #0
 declare %v2f32 @llvm.sin.v2f32(%v2f32) #0
 declare %v2f32 @llvm.cos.v2f32(%v2f32) #0
@@ -311,38 +434,18 @@
 declare %v2f32 @llvm.log.v2f32(%v2f32) #0
 declare %v2f32 @llvm.log10.v2f32(%v2f32) #0
 declare %v2f32 @llvm.log2.v2f32(%v2f32) #0
-
 declare %v2f32 @llvm.fma.v2f32(%v2f32, %v2f32, %v2f32) #0
-declare %v4f16 @llvm.fma.v4f16(%v4f16, %v4f16, %v4f16) #0
-declare %v8f16 @llvm.fma.v8f16(%v8f16, %v8f16, %v8f16) #0
-
 declare %v2f32 @llvm.fabs.v2f32(%v2f32) #0
-declare %v4f16 @llvm.fabs.v4f16(%v4f16) #0
-declare %v8f16 @llvm.fabs.v8f16(%v8f16) #0
-
 declare %v2f32 @llvm.floor.v2f32(%v2f32) #0
-declare %v4f16 @llvm.floor.v4f16(%v4f16) #0
-declare %v8f16 @llvm.floor.v8f16(%v8f16) #0
-
 declare %v2f32 @llvm.ceil.v2f32(%v2f32) #0
-declare %v4f16 @llvm.ceil.v4f16(%v4f16) #0
-declare %v8f16 @llvm.ceil.v8f16(%v8f16) #0
-
 declare %v2f32 @llvm.trunc.v2f32(%v2f32) #0
-declare %v4f16 @llvm.trunc.v4f16(%v4f16) #0
-declare %v8f16 @llvm.trunc.v8f16(%v8f16) #0
-
 declare %v2f32 @llvm.rint.v2f32(%v2f32) #0
-declare %v4f16 @llvm.rint.v4f16(%v4f16) #0
-declare %v8f16 @llvm.rint.v8f16(%v8f16) #0
-
 declare %v2f32 @llvm.nearbyint.v2f32(%v2f32) #0
-declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0
-declare %v8f16 @llvm.nearbyint.v8f16(%v8f16) #0
 
 ;;;
 
 %v4f32 = type <4 x float>
+
 ; CHECK: test_v4f32.sqrt:
 define %v4f32 @test_v4f32.sqrt(%v4f32 %a) {
   ; CHECK: fsqrt.4s