[X86] Add 64-bit int to float/double conversion with AVX to X86FastISel::X86SelectSIToFP Summary: [X86] Teach fast isel to handle i64 sitofp with AVX. For some reason we only handled i32 sitofp with AVX. But with SSE only we support i64 so we should do the same with AVX. Also add i686 command lines for the 32-bit tests. 64-bit tests are in a separate file to avoid a fast-isel abort failure in 32-bit mode. Reviewers: RKSimon, zvi Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D39450 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317102 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index c248fb3..9ea7590 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp
@@ -2410,7 +2410,8 @@ if (!Subtarget->hasAVX()) return false; - if (!I->getOperand(0)->getType()->isIntegerTy(32)) + Type *InTy = I->getOperand(0)->getType(); + if (!InTy->isIntegerTy(32) && !InTy->isIntegerTy(64)) return false; // Select integer to float/double conversion. @@ -2423,11 +2424,11 @@ if (I->getType()->isDoubleTy()) { // sitofp int -> double - Opcode = X86::VCVTSI2SDrr; + Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI2SD64rr : X86::VCVTSI2SDrr; RC = &X86::FR64RegClass; } else if (I->getType()->isFloatTy()) { // sitofp int -> float - Opcode = X86::VCVTSI2SSrr; + Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI2SS64rr : X86::VCVTSI2SSrr; RC = &X86::FR32RegClass; } else return false;
diff --git a/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll b/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll new file mode 100644 index 0000000..432e190 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
@@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX + + +define double @long_to_double_rr(i64 %a) { +; SSE2-LABEL: long_to_double_rr: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: cvtsi2sdq %rdi, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: long_to_double_rr: +; AVX: # BB#0: # %entry +; AVX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = sitofp i64 %a to double + ret double %0 +} + +define double @long_to_double_rm(i64* %a) { +; SSE2-LABEL: long_to_double_rm: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: long_to_double_rm: +; AVX: # BB#0: # %entry +; AVX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = load i64, i64* %a + %1 = sitofp i64 %0 to double + ret double %1 +} + +define float @long_to_float_rr(i64 %a) { +; SSE2-LABEL: long_to_float_rr: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: cvtsi2ssq %rdi, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: long_to_float_rr: +; AVX: # BB#0: # %entry +; AVX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = sitofp i64 %a to float + ret float %0 +} + +define float @long_to_float_rm(i64* %a) { +; SSE2-LABEL: long_to_float_rm: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: long_to_float_rm: +; AVX: # BB#0: # %entry +; AVX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = load i64, i64* %a + %1 = sitofp i64 %0 to float + ret float %1 +}
diff --git a/test/CodeGen/X86/fast-isel-int-float-conversion.ll b/test/CodeGen/X86/fast-isel-int-float-conversion.ll index bc8f14f..2286fb9 100644 --- a/test/CodeGen/X86/fast-isel-int-float-conversion.ll +++ b/test/CodeGen/X86/fast-isel-int-float-conversion.ll
@@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 -; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=SSE2 +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX +; RUN: llc -mtriple=i686-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=SSE2_X86 +; RUN: llc -mtriple=i686-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX_X86 define double @int_to_double_rr(i32 %a) { @@ -13,6 +15,39 @@ ; AVX: # BB#0: # %entry ; AVX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 ; AVX-NEXT: retq +; +; SSE2_X86-LABEL: int_to_double_rr: +; SSE2_X86: # BB#0: # %entry +; SSE2_X86-NEXT: pushl %ebp +; SSE2_X86-NEXT: .cfi_def_cfa_offset 8 +; SSE2_X86-NEXT: .cfi_offset %ebp, -8 +; SSE2_X86-NEXT: movl %esp, %ebp +; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp +; SSE2_X86-NEXT: andl $-8, %esp +; SSE2_X86-NEXT: subl $8, %esp +; SSE2_X86-NEXT: movl 8(%ebp), %eax +; SSE2_X86-NEXT: cvtsi2sdl %eax, %xmm0 +; SSE2_X86-NEXT: movsd %xmm0, (%esp) +; SSE2_X86-NEXT: fldl (%esp) +; SSE2_X86-NEXT: movl %ebp, %esp +; SSE2_X86-NEXT: popl %ebp +; SSE2_X86-NEXT: retl +; +; AVX_X86-LABEL: int_to_double_rr: +; AVX_X86: # BB#0: # %entry +; AVX_X86-NEXT: pushl %ebp +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: .cfi_offset %ebp, -8 +; AVX_X86-NEXT: movl %esp, %ebp +; AVX_X86-NEXT: .cfi_def_cfa_register %ebp +; AVX_X86-NEXT: andl $-8, %esp +; AVX_X86-NEXT: subl $8, %esp +; AVX_X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0 +; AVX_X86-NEXT: vmovsd %xmm0, (%esp) +; AVX_X86-NEXT: fldl (%esp) +; AVX_X86-NEXT: movl %ebp, %esp +; AVX_X86-NEXT: popl %ebp +; AVX_X86-NEXT: retl entry: %0 = sitofp i32 %a to double ret double %0 @@ -28,6 +63,40 @@ ; AVX: # BB#0: # %entry ; AVX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq +; +; SSE2_X86-LABEL: int_to_double_rm: +; SSE2_X86: # BB#0: # %entry +; SSE2_X86-NEXT: pushl %ebp +; SSE2_X86-NEXT: .cfi_def_cfa_offset 8 +; SSE2_X86-NEXT: .cfi_offset %ebp, -8 +; SSE2_X86-NEXT: movl %esp, %ebp +; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp +; SSE2_X86-NEXT: andl $-8, %esp +; SSE2_X86-NEXT: subl $8, %esp +; SSE2_X86-NEXT: movl 8(%ebp), %eax +; SSE2_X86-NEXT: cvtsi2sdl (%eax), %xmm0 +; SSE2_X86-NEXT: movsd %xmm0, (%esp) +; SSE2_X86-NEXT: fldl (%esp) +; SSE2_X86-NEXT: movl %ebp, %esp +; SSE2_X86-NEXT: popl %ebp +; SSE2_X86-NEXT: retl +; +; AVX_X86-LABEL: int_to_double_rm: +; AVX_X86: # BB#0: # %entry +; AVX_X86-NEXT: pushl %ebp +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: .cfi_offset %ebp, -8 +; AVX_X86-NEXT: movl %esp, %ebp +; AVX_X86-NEXT: .cfi_def_cfa_register %ebp +; AVX_X86-NEXT: andl $-8, %esp +; AVX_X86-NEXT: subl $8, %esp +; AVX_X86-NEXT: movl 8(%ebp), %eax +; AVX_X86-NEXT: vcvtsi2sdl (%eax), %xmm0, %xmm0 +; AVX_X86-NEXT: vmovsd %xmm0, (%esp) +; AVX_X86-NEXT: fldl (%esp) +; AVX_X86-NEXT: movl %ebp, %esp +; AVX_X86-NEXT: popl %ebp +; AVX_X86-NEXT: retl entry: %0 = load i32, i32* %a %1 = sitofp i32 %0 to double @@ -44,6 +113,27 @@ ; AVX: # BB#0: # %entry ; AVX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 ; AVX-NEXT: retq +; +; SSE2_X86-LABEL: int_to_float_rr: +; SSE2_X86: # BB#0: # %entry +; SSE2_X86-NEXT: pushl %eax +; SSE2_X86-NEXT: .cfi_def_cfa_offset 8 +; SSE2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE2_X86-NEXT: cvtsi2ssl %eax, %xmm0 +; SSE2_X86-NEXT: movss %xmm0, (%esp) +; SSE2_X86-NEXT: flds (%esp) +; SSE2_X86-NEXT: popl %eax +; SSE2_X86-NEXT: retl +; +; AVX_X86-LABEL: int_to_float_rr: +; AVX_X86: # BB#0: # %entry +; AVX_X86-NEXT: pushl %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX_X86-NEXT: vmovss %xmm0, (%esp) +; AVX_X86-NEXT: flds (%esp) +; AVX_X86-NEXT: popl %eax +; AVX_X86-NEXT: retl entry: %0 = sitofp i32 %a to float ret float %0 @@ -59,6 +149,28 @@ ; AVX: # BB#0: # %entry ; AVX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq +; +; SSE2_X86-LABEL: int_to_float_rm: +; SSE2_X86: # BB#0: # %entry +; SSE2_X86-NEXT: pushl %eax +; SSE2_X86-NEXT: .cfi_def_cfa_offset 8 +; SSE2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE2_X86-NEXT: cvtsi2ssl (%eax), %xmm0 +; SSE2_X86-NEXT: movss %xmm0, (%esp) +; SSE2_X86-NEXT: flds (%esp) +; SSE2_X86-NEXT: popl %eax +; SSE2_X86-NEXT: retl +; +; AVX_X86-LABEL: int_to_float_rm: +; AVX_X86: # BB#0: # %entry +; AVX_X86-NEXT: pushl %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX_X86-NEXT: vcvtsi2ssl (%eax), %xmm0, %xmm0 +; AVX_X86-NEXT: vmovss %xmm0, (%esp) +; AVX_X86-NEXT: flds (%esp) +; AVX_X86-NEXT: popl %eax +; AVX_X86-NEXT: retl entry: %0 = load i32, i32* %a %1 = sitofp i32 %0 to float