[X86][FastISel] Support EVEX version of sqrt.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@336939 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 4199eb2..d65d81b 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -2799,17 +2799,19 @@
// Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
// is not generated by FastISel yet.
// FIXME: Update this code once tablegen can handle it.
- static const uint16_t SqrtOpc[2][2] = {
- {X86::SQRTSSr, X86::VSQRTSSr},
- {X86::SQRTSDr, X86::VSQRTSDr}
+ static const uint16_t SqrtOpc[3][2] = {
+ { X86::SQRTSSr, X86::SQRTSDr },
+ { X86::VSQRTSSr, X86::VSQRTSDr },
+ { X86::VSQRTSSZr, X86::VSQRTSDZr },
};
- bool HasAVX = Subtarget->hasAVX();
+ unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
+ Subtarget->hasAVX() ? 1 :
+ 0;
unsigned Opc;
- const TargetRegisterClass *RC;
switch (VT.SimpleTy) {
default: return false;
- case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
- case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
+ case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
+ case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
}
const Value *SrcVal = II->getArgOperand(0);
@@ -2818,8 +2820,9 @@
if (SrcReg == 0)
return false;
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
unsigned ImplicitDefReg = 0;
- if (HasAVX) {
+ if (AVXLevel > 0) {
ImplicitDefReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
diff --git a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
index 1b54562..fce52bf 100644
--- a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
@@ -2614,10 +2614,15 @@
; X64-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
-; X64-AVX-LABEL: test_mm_sqrt_ss_scalar:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0]
-; X64-AVX-NEXT: retq # encoding: [0xc3]
+; X64-AVX1-LABEL: test_mm_sqrt_ss_scalar:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0]
+; X64-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X64-AVX512-LABEL: test_mm_sqrt_ss_scalar:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0]
+; X64-AVX512-NEXT: retq # encoding: [0xc3]
%sqrt = call float @llvm.sqrt.f32(float %a0)
ret float %sqrt
}
diff --git a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index 03acbaa..be38989 100644
--- a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -4959,10 +4959,15 @@
; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
-; X64-AVX-LABEL: test_mm_sqrt_sd_scalar:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0]
-; X64-AVX-NEXT: retq # encoding: [0xc3]
+; X64-AVX1-LABEL: test_mm_sqrt_sd_scalar:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0]
+; X64-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X64-AVX512-LABEL: test_mm_sqrt_sd_scalar:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
+; X64-AVX512-NEXT: retq # encoding: [0xc3]
%sqrt = call double @llvm.sqrt.f64(double %a0)
ret double %sqrt
}