[X86] Don't prevent load folding for cvtsi2ss/cvtsi2sd based on hasPartialRegUpdate. Preventing the load fold won't fix the partial register update since the input we can fold is a GPR. So it will do nothing to prevent a false dependency on an XMM register. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354193 91177308-0d34-0410-b5e6-96231b3b80d8

commit: df61a5888f8a498e04446fa5e46fc0bb66227757 [log] [tgz]
author: Craig Topper <craig.topper@intel.com> Sat Feb 16 03:34:54 2019 +0000
committer: Craig Topper <craig.topper@intel.com> Sat Feb 16 03:34:54 2019 +0000
tree: e6aaa72b3febc9880fa852f3713d2ef7db65cf88
parent: 2cb3057fcbfb9dd916744c01092c963b4fb4933a [diff]
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index db55d50..b335976 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp

@@ -4239,7 +4239,8 @@
 /// FIXME: This should be turned into a TSFlags.
 ///
 static bool hasPartialRegUpdate(unsigned Opcode,
-                                const X86Subtarget &Subtarget) {
+                                const X86Subtarget &Subtarget,
+                                bool ForLoadFold = false) {
   switch (Opcode) {
   case X86::CVTSI2SSrr:
   case X86::CVTSI2SSrm:
@@ -4249,6 +4250,9 @@
   case X86::CVTSI2SDrm:
   case X86::CVTSI642SDrr:
   case X86::CVTSI642SDrm:
+    // Load folding won't effect the undef register update since the input is
+    // a GPR.
+    return !ForLoadFold;
   case X86::CVTSD2SSrr:
   case X86::CVTSD2SSrm:
   case X86::CVTSS2SDrr:
@@ -4325,7 +4329,7 @@
 
 // Return true for any instruction the copies the high bits of the first source
 // operand into the unused high bits of the destination operand.
-static bool hasUndefRegUpdate(unsigned Opcode) {
+static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) {
   switch (Opcode) {
   case X86::VCVTSI2SSrr:
   case X86::VCVTSI2SSrm:
@@ -4343,38 +4347,6 @@
   case X86::VCVTSI642SDrm:
   case X86::VCVTSI642SDrr_Int:
   case X86::VCVTSI642SDrm_Int:
-  case X86::VCVTSD2SSrr:
-  case X86::VCVTSD2SSrm:
-  case X86::VCVTSD2SSrr_Int:
-  case X86::VCVTSD2SSrm_Int:
-  case X86::VCVTSS2SDrr:
-  case X86::VCVTSS2SDrm:
-  case X86::VCVTSS2SDrr_Int:
-  case X86::VCVTSS2SDrm_Int:
-  case X86::VRCPSSr:
-  case X86::VRCPSSr_Int:
-  case X86::VRCPSSm:
-  case X86::VRCPSSm_Int:
-  case X86::VROUNDSDr:
-  case X86::VROUNDSDm:
-  case X86::VROUNDSDr_Int:
-  case X86::VROUNDSDm_Int:
-  case X86::VROUNDSSr:
-  case X86::VROUNDSSm:
-  case X86::VROUNDSSr_Int:
-  case X86::VROUNDSSm_Int:
-  case X86::VRSQRTSSr:
-  case X86::VRSQRTSSr_Int:
-  case X86::VRSQRTSSm:
-  case X86::VRSQRTSSm_Int:
-  case X86::VSQRTSSr:
-  case X86::VSQRTSSr_Int:
-  case X86::VSQRTSSm:
-  case X86::VSQRTSSm_Int:
-  case X86::VSQRTSDr:
-  case X86::VSQRTSDr_Int:
-  case X86::VSQRTSDm:
-  case X86::VSQRTSDm_Int:
   // AVX-512
   case X86::VCVTSI2SSZrr:
   case X86::VCVTSI2SSZrm:
@@ -4415,6 +4387,42 @@
   case X86::VCVTUSI642SDZrr_Int:
   case X86::VCVTUSI642SDZrrb_Int:
   case X86::VCVTUSI642SDZrm_Int:
+    // Load folding won't effect the undef register update since the input is
+    // a GPR.
+    return !ForLoadFold;
+  case X86::VCVTSD2SSrr:
+  case X86::VCVTSD2SSrm:
+  case X86::VCVTSD2SSrr_Int:
+  case X86::VCVTSD2SSrm_Int:
+  case X86::VCVTSS2SDrr:
+  case X86::VCVTSS2SDrm:
+  case X86::VCVTSS2SDrr_Int:
+  case X86::VCVTSS2SDrm_Int:
+  case X86::VRCPSSr:
+  case X86::VRCPSSr_Int:
+  case X86::VRCPSSm:
+  case X86::VRCPSSm_Int:
+  case X86::VROUNDSDr:
+  case X86::VROUNDSDm:
+  case X86::VROUNDSDr_Int:
+  case X86::VROUNDSDm_Int:
+  case X86::VROUNDSSr:
+  case X86::VROUNDSSm:
+  case X86::VROUNDSSr_Int:
+  case X86::VROUNDSSm_Int:
+  case X86::VRSQRTSSr:
+  case X86::VRSQRTSSr_Int:
+  case X86::VRSQRTSSm:
+  case X86::VRSQRTSSm_Int:
+  case X86::VSQRTSSr:
+  case X86::VSQRTSSr_Int:
+  case X86::VSQRTSSm:
+  case X86::VSQRTSSm_Int:
+  case X86::VSQRTSDr:
+  case X86::VSQRTSDr_Int:
+  case X86::VSQRTSDm:
+  case X86::VSQRTSDm_Int:
+  // AVX-512
   case X86::VCVTSD2SSZrr:
   case X86::VCVTSD2SSZrr_Int:
   case X86::VCVTSD2SSZrrb_Int:
@@ -4735,8 +4743,9 @@
   return nullptr;
 }
 
-static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr &MI) {
-  if (MF.getFunction().optForSize() || !hasUndefRegUpdate(MI.getOpcode()) ||
+static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF,
+                                               MachineInstr &MI) {
+  if (!hasUndefRegUpdate(MI.getOpcode(), /*ForLoadFold*/true) ||
       !MI.getOperand(1).isReg())
     return false;
 
@@ -4772,7 +4781,7 @@
 
   // Avoid partial and undef register update stalls unless optimizing for size.
   if (!MF.getFunction().optForSize() &&
-      (hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
+      (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
        shouldPreventUndefRegUpdateMemFold(MF, MI)))
     return nullptr;
 
@@ -4940,7 +4949,7 @@
 
   // Avoid partial and undef register update stalls unless optimizing for size.
   if (!MF.getFunction().optForSize() &&
-      (hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
+      (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
        shouldPreventUndefRegUpdateMemFold(MF, MI)))
     return nullptr;
 
@@ -5140,7 +5149,7 @@
 
   // Avoid partial and undef register update stalls unless optimizing for size.
   if (!MF.getFunction().optForSize() &&
-      (hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
+      (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
        shouldPreventUndefRegUpdateMemFold(MF, MI)))
     return nullptr;
 

diff --git a/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll b/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
index 469b5e5..5ba47bd 100644
--- a/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll

@@ -22,14 +22,12 @@
 define double @long_to_double_rm(i64* %a) {
 ; SSE2-LABEL: long_to_double_rm:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movq (%rdi), %rax
-; SSE2-NEXT:    cvtsi2sdq %rax, %xmm0
+; SSE2-NEXT:    cvtsi2sdq (%rdi), %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: long_to_double_rm:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    movq (%rdi), %rax
-; AVX-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm0
+; AVX-NEXT:    vcvtsi2sdq (%rdi), %xmm0, %xmm0
 ; AVX-NEXT:    retq
 entry:
   %0 = load i64, i64* %a
@@ -71,14 +69,12 @@
 define float @long_to_float_rm(i64* %a) {
 ; SSE2-LABEL: long_to_float_rm:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movq (%rdi), %rax
-; SSE2-NEXT:    cvtsi2ssq %rax, %xmm0
+; SSE2-NEXT:    cvtsi2ssq (%rdi), %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: long_to_float_rm:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    movq (%rdi), %rax
-; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm0
+; AVX-NEXT:    vcvtsi2ssq (%rdi), %xmm0, %xmm0
 ; AVX-NEXT:    retq
 entry:
   %0 = load i64, i64* %a

diff --git a/test/CodeGen/X86/fast-isel-int-float-conversion.ll b/test/CodeGen/X86/fast-isel-int-float-conversion.ll
index fbaa86a..7ba8ac1 100644
--- a/test/CodeGen/X86/fast-isel-int-float-conversion.ll
+++ b/test/CodeGen/X86/fast-isel-int-float-conversion.ll

@@ -27,8 +27,7 @@
 ; SSE2_X86-NEXT:    .cfi_def_cfa_register %ebp
 ; SSE2_X86-NEXT:    andl $-8, %esp
 ; SSE2_X86-NEXT:    subl $8, %esp
-; SSE2_X86-NEXT:    movl 8(%ebp), %eax
-; SSE2_X86-NEXT:    cvtsi2sdl %eax, %xmm0
+; SSE2_X86-NEXT:    cvtsi2sdl 8(%ebp), %xmm0
 ; SSE2_X86-NEXT:    movsd %xmm0, (%esp)
 ; SSE2_X86-NEXT:    fldl (%esp)
 ; SSE2_X86-NEXT:    movl %ebp, %esp
@@ -45,8 +44,7 @@
 ; AVX_X86-NEXT:    .cfi_def_cfa_register %ebp
 ; AVX_X86-NEXT:    andl $-8, %esp
 ; AVX_X86-NEXT:    subl $8, %esp
-; AVX_X86-NEXT:    movl 8(%ebp), %eax
-; AVX_X86-NEXT:    vcvtsi2sdl %eax, %xmm0, %xmm0
+; AVX_X86-NEXT:    vcvtsi2sdl 8(%ebp), %xmm0, %xmm0
 ; AVX_X86-NEXT:    vmovsd %xmm0, (%esp)
 ; AVX_X86-NEXT:    fldl (%esp)
 ; AVX_X86-NEXT:    movl %ebp, %esp
@@ -61,14 +59,12 @@
 define double @int_to_double_rm(i32* %a) {
 ; SSE2-LABEL: int_to_double_rm:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movl (%rdi), %eax
-; SSE2-NEXT:    cvtsi2sdl %eax, %xmm0
+; SSE2-NEXT:    cvtsi2sdl (%rdi), %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: int_to_double_rm:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    movl (%rdi), %eax
-; AVX-NEXT:    vcvtsi2sdl %eax, %xmm0, %xmm0
+; AVX-NEXT:    vcvtsi2sdl (%rdi), %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; SSE2_X86-LABEL: int_to_double_rm:
@@ -179,8 +175,7 @@
 ; SSE2_X86:       # %bb.0: # %entry
 ; SSE2_X86-NEXT:    pushl %eax
 ; SSE2_X86-NEXT:    .cfi_def_cfa_offset 8
-; SSE2_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; SSE2_X86-NEXT:    cvtsi2ssl %eax, %xmm0
+; SSE2_X86-NEXT:    cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
 ; SSE2_X86-NEXT:    movss %xmm0, (%esp)
 ; SSE2_X86-NEXT:    flds (%esp)
 ; SSE2_X86-NEXT:    popl %eax
@@ -191,8 +186,7 @@
 ; AVX_X86:       # %bb.0: # %entry
 ; AVX_X86-NEXT:    pushl %eax
 ; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
-; AVX_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; AVX_X86-NEXT:    vcvtsi2ssl %eax, %xmm0, %xmm0
+; AVX_X86-NEXT:    vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
 ; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
 ; AVX_X86-NEXT:    flds (%esp)
 ; AVX_X86-NEXT:    popl %eax
@@ -206,14 +200,12 @@
 define float @int_to_float_rm(i32* %a) {
 ; SSE2-LABEL: int_to_float_rm:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movl (%rdi), %eax
-; SSE2-NEXT:    cvtsi2ssl %eax, %xmm0
+; SSE2-NEXT:    cvtsi2ssl (%rdi), %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: int_to_float_rm:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    movl (%rdi), %eax
-; AVX-NEXT:    vcvtsi2ssl %eax, %xmm0, %xmm0
+; AVX-NEXT:    vcvtsi2ssl (%rdi), %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; SSE2_X86-LABEL: int_to_float_rm:

diff --git a/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll b/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll
index 60d2903..22d8aa7 100644
--- a/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll

@@ -15,8 +15,7 @@
 define double @long_to_double_rm(i64* %a) {
 ; ALL-LABEL: long_to_double_rm:
 ; ALL:       # %bb.0: # %entry
-; ALL-NEXT:    movq (%rdi), %rax
-; ALL-NEXT:    vcvtusi2sdq %rax, %xmm0, %xmm0
+; ALL-NEXT:    vcvtusi2sdq (%rdi), %xmm0, %xmm0
 ; ALL-NEXT:    retq
 entry:
   %0 = load i64, i64* %a
@@ -48,8 +47,7 @@
 define float @long_to_float_rm(i64* %a) {
 ; ALL-LABEL: long_to_float_rm:
 ; ALL:       # %bb.0: # %entry
-; ALL-NEXT:    movq (%rdi), %rax
-; ALL-NEXT:    vcvtusi2ssq %rax, %xmm0, %xmm0
+; ALL-NEXT:    vcvtusi2ssq (%rdi), %xmm0, %xmm0
 ; ALL-NEXT:    retq
 entry:
   %0 = load i64, i64* %a

diff --git a/test/CodeGen/X86/fast-isel-uint-float-conversion.ll b/test/CodeGen/X86/fast-isel-uint-float-conversion.ll
index 6aad161..f883ac1 100644
--- a/test/CodeGen/X86/fast-isel-uint-float-conversion.ll
+++ b/test/CodeGen/X86/fast-isel-uint-float-conversion.ll

@@ -18,8 +18,7 @@
 ; AVX_X86-NEXT:    .cfi_def_cfa_register %ebp
 ; AVX_X86-NEXT:    andl $-8, %esp
 ; AVX_X86-NEXT:    subl $8, %esp
-; AVX_X86-NEXT:    movl 8(%ebp), %eax
-; AVX_X86-NEXT:    vcvtusi2sdl %eax, %xmm0, %xmm0
+; AVX_X86-NEXT:    vcvtusi2sdl 8(%ebp), %xmm0, %xmm0
 ; AVX_X86-NEXT:    vmovsd %xmm0, (%esp)
 ; AVX_X86-NEXT:    fldl (%esp)
 ; AVX_X86-NEXT:    movl %ebp, %esp
@@ -34,8 +33,7 @@
 define double @int_to_double_rm(i32* %a) {
 ; AVX-LABEL: int_to_double_rm:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    movl (%rdi), %eax
-; AVX-NEXT:    vcvtusi2sdl %eax, %xmm0, %xmm0
+; AVX-NEXT:    vcvtusi2sdl (%rdi), %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX_X86-LABEL: int_to_double_rm:
@@ -100,8 +98,7 @@
 ; AVX_X86:       # %bb.0: # %entry
 ; AVX_X86-NEXT:    pushl %eax
 ; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
-; AVX_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; AVX_X86-NEXT:    vcvtusi2ssl %eax, %xmm0, %xmm0
+; AVX_X86-NEXT:    vcvtusi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
 ; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
 ; AVX_X86-NEXT:    flds (%esp)
 ; AVX_X86-NEXT:    popl %eax
@@ -115,8 +112,7 @@
 define float @int_to_float_rm(i32* %a) {
 ; AVX-LABEL: int_to_float_rm:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    movl (%rdi), %eax
-; AVX-NEXT:    vcvtusi2ssl %eax, %xmm0, %xmm0
+; AVX-NEXT:    vcvtusi2ssl (%rdi), %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX_X86-LABEL: int_to_float_rm:

diff --git a/test/CodeGen/X86/stack-folding-fp-avx1.ll b/test/CodeGen/X86/stack-folding-fp-avx1.ll
index 0d903ef..cbeeb04 100644
--- a/test/CodeGen/X86/stack-folding-fp-avx1.ll
+++ b/test/CodeGen/X86/stack-folding-fp-avx1.ll

@@ -577,8 +577,7 @@
 }
 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
 
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define double @stack_fold_cvtsi2sd(i32 %a0) optsize {
+define double @stack_fold_cvtsi2sd(i32 %a0) {
   ;CHECK-LABEL: stack_fold_cvtsi2sd
   ;CHECK:  vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -586,8 +585,7 @@
   ret double %2
 }
 
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) optsize {
+define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) {
   ;CHECK-LABEL: stack_fold_cvtsi2sd_int
   ;CHECK:  vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -596,8 +594,7 @@
   ret <2 x double> %3
 }
 
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define double @stack_fold_cvtsi642sd(i64 %a0) optsize {
+define double @stack_fold_cvtsi642sd(i64 %a0) {
   ;CHECK-LABEL: stack_fold_cvtsi642sd
   ;CHECK:  vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -605,8 +602,7 @@
   ret double %2
 }
 
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) optsize {
+define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) {
   ;CHECK-LABEL: stack_fold_cvtsi642sd_int
   ;CHECK:  vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -615,8 +611,7 @@
   ret <2 x double> %3
 }
 
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define float @stack_fold_cvtsi2ss(i32 %a0) optsize {
+define float @stack_fold_cvtsi2ss(i32 %a0) {
   ;CHECK-LABEL: stack_fold_cvtsi2ss
   ;CHECK:  vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -624,8 +619,7 @@
   ret float %2
 }
 
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) optsize {
+define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) {
   ;CHECK-LABEL: stack_fold_cvtsi2ss_int
   ;CHECK:  vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -634,8 +628,7 @@
   ret <4 x float> %3
 }
 
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define float @stack_fold_cvtsi642ss(i64 %a0) optsize {
+define float @stack_fold_cvtsi642ss(i64 %a0) {
   ;CHECK-LABEL: stack_fold_cvtsi642ss
   ;CHECK:  vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -643,8 +636,7 @@
   ret float %2
 }
 
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) optsize {
+define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) {
   ;CHECK-LABEL: stack_fold_cvtsi642ss_int
   ;CHECK:  vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()

diff --git a/test/CodeGen/X86/stack-folding-fp-sse42.ll b/test/CodeGen/X86/stack-folding-fp-sse42.ll
index 4599c4d..37f235c 100644
--- a/test/CodeGen/X86/stack-folding-fp-sse42.ll
+++ b/test/CodeGen/X86/stack-folding-fp-sse42.ll

@@ -357,7 +357,7 @@
 }
 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
 
-define double @stack_fold_cvtsi2sd(i32 %a0) minsize {
+define double @stack_fold_cvtsi2sd(i32 %a0) {
   ;CHECK-LABEL: stack_fold_cvtsi2sd
   ;CHECK:       cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -374,7 +374,7 @@
   ret <2 x double> %3
 }
 
-define double @stack_fold_cvtsi642sd(i64 %a0) optsize {
+define double @stack_fold_cvtsi642sd(i64 %a0) {
   ;CHECK-LABEL: stack_fold_cvtsi642sd
   ;CHECK:       cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -391,7 +391,7 @@
   ret <2 x double> %3
 }
 
-define float @stack_fold_cvtsi2ss(i32 %a0) minsize {
+define float @stack_fold_cvtsi2ss(i32 %a0) {
   ;CHECK-LABEL: stack_fold_cvtsi2ss
   ;CHECK:       cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -408,7 +408,7 @@
   ret <4 x float> %3
 }
 
-define float @stack_fold_cvtsi642ss(i64 %a0) optsize {
+define float @stack_fold_cvtsi642ss(i64 %a0) {
   ;CHECK-LABEL: stack_fold_cvtsi642ss
   ;CHECK:       cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
commit	df61a5888f8a498e04446fa5e46fc0bb66227757	[log] [tgz]
author	Craig Topper <craig.topper@intel.com>	Sat Feb 16 03:34:54 2019 +0000
committer	Craig Topper <craig.topper@intel.com>	Sat Feb 16 03:34:54 2019 +0000
tree	e6aaa72b3febc9880fa852f3713d2ef7db65cf88
parent	2cb3057fcbfb9dd916744c01092c963b4fb4933a [diff]