blob: ca014b6dc095327cc124b6bdcf7166981302937d [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2,-xop | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; GENERIC-LABEL: test_addpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_addpd:
; ATOM: # %bb.0:
; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00]
; ATOM-NEXT: addpd (%rdi), %xmm0 # sched: [7:3.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_addpd:
; SLM: # %bb.0:
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: addpd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_addpd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_addpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_addpd:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_addpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_addpd:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_addpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_addpd:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_addpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_addpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_addpd:
; SKX: # %bb.0:
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_addpd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:1.00]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_addpd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_addpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_addpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_addpd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_addpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = fadd <2 x double> %a0, %a1
%2 = load <2 x double>, <2 x double> *%a2, align 16
%3 = fadd <2 x double> %1, %2
ret <2 x double> %3
}
define double @test_addsd(double %a0, double %a1, double *%a2) {
; GENERIC-LABEL: test_addsd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_addsd:
; ATOM: # %bb.0:
; ATOM-NEXT: addsd %xmm1, %xmm0 # sched: [5:5.00]
; ATOM-NEXT: addsd (%rdi), %xmm0 # sched: [5:5.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_addsd:
; SLM: # %bb.0:
; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: addsd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_addsd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_addsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_addsd:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_addsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_addsd:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_addsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_addsd:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_addsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_addsd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_addsd:
; SKX: # %bb.0:
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_addsd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [10:1.00]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_addsd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_addsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_addsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_addsd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [10:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_addsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = fadd double %a0, %a1
%2 = load double, double *%a2, align 8
%3 = fadd double %1, %2
ret double %3
}
define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; GENERIC-LABEL: test_andpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_andpd:
; ATOM: # %bb.0:
; ATOM-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: andpd (%rdi), %xmm0 # sched: [1:1.00]
; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_andpd:
; SLM: # %bb.0:
; SLM-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: andpd (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_andpd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00]
; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_andpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_andpd:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00]
; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_andpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_andpd:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00]
; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_andpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_andpd:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50]
; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_andpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_andpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_andpd:
; SKX: # %bb.0:
; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_andpd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [2:0.50]
; BDVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_andpd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
; BDVER2-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_andpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00]
; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_andpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_andpd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [8:0.50]
; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_andpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = bitcast <2 x double> %a0 to <4 x i32>
%2 = bitcast <2 x double> %a1 to <4 x i32>
%3 = and <4 x i32> %1, %2
%4 = load <2 x double>, <2 x double> *%a2, align 16
%5 = bitcast <2 x double> %4 to <4 x i32>
%6 = and <4 x i32> %3, %5
%7 = bitcast <4 x i32> %6 to <2 x double>
%8 = fadd <2 x double> %a1, %7
ret <2 x double> %8
}
define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; GENERIC-LABEL: test_andnotpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_andnotpd:
; ATOM: # %bb.0:
; ATOM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: andnpd (%rdi), %xmm0 # sched: [1:1.00]
; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_andnotpd:
; SLM: # %bb.0:
; SLM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: andnpd (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_andnotpd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00]
; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_andnotpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_andnotpd:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00]
; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_andnotpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_andnotpd:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00]
; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_andnotpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_andnotpd:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50]
; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_andnotpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_andnotpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_andnotpd:
; SKX: # %bb.0:
; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_andnotpd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [2:0.50]
; BDVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_andnotpd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
; BDVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_andnotpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00]
; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_andnotpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_andnotpd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [8:0.50]
; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_andnotpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = bitcast <2 x double> %a0 to <4 x i32>
%2 = bitcast <2 x double> %a1 to <4 x i32>
%3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
%4 = and <4 x i32> %3, %2
%5 = load <2 x double>, <2 x double> *%a2, align 16
%6 = bitcast <2 x double> %5 to <4 x i32>
%7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1>
%8 = and <4 x i32> %6, %7
%9 = bitcast <4 x i32> %8 to <2 x double>
%10 = fadd <2 x double> %a1, %9
ret <2 x double> %10
}
define void @test_clflush(i8* %p){
; GENERIC-LABEL: test_clflush:
; GENERIC: # %bb.0:
; GENERIC-NEXT: clflush (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_clflush:
; ATOM: # %bb.0:
; ATOM-NEXT: clflush (%rdi) # sched: [1:1.00]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_clflush:
; SLM: # %bb.0:
; SLM-NEXT: clflush (%rdi) # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_clflush:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: clflush (%rdi) # sched: [5:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_clflush:
; SANDY: # %bb.0:
; SANDY-NEXT: clflush (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_clflush:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_clflush:
; HASWELL: # %bb.0:
; HASWELL-NEXT: clflush (%rdi) # sched: [2:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_clflush:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_clflush:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: clflush (%rdi) # sched: [2:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_clflush:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_clflush:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: clflush (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_clflush:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_clflush:
; SKX: # %bb.0:
; SKX-NEXT: clflush (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_clflush:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: clflush (%rdi) # sched: [5:0.50]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_clflush:
; BDVER2: # %bb.0:
; BDVER2-NEXT: clflush (%rdi) # sched: [5:0.50]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_clflush:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: clflush (%rdi) # sched: [5:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_clflush:
; BTVER2: # %bb.0:
; BTVER2-NEXT: clflush (%rdi) # sched: [5:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_clflush:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: clflush (%rdi) # sched: [8:0.50]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_clflush:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: clflush (%rdi) # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void @llvm.x86.sse2.clflush(i8* %p)
ret void
}
declare void @llvm.x86.sse2.clflush(i8*) nounwind
define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; GENERIC-LABEL: test_cmppd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
; GENERIC-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
; GENERIC-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cmppd:
; ATOM: # %bb.0:
; ATOM-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [6:3.00]
; ATOM-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:3.50]
; ATOM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cmppd:
; SLM: # %bb.0:
; SLM-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
; SLM-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cmppd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
; SANDY-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cmppd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; SANDY-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cmppd:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
; HASWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cmppd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; HASWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cmppd:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [8:1.00]
; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cmppd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; BROADWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cmppd:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cmppd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
; SKYLAKE-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cmppd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cmppd:
; SKX: # %bb.0:
; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cmppd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00]
; BDVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:1.00]
; BDVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [2:0.50]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cmppd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
; BDVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BDVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cmppd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00]
; BTVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:1.00]
; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cmppd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
; BTVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cmppd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:1.00]
; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cmppd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; ZNVER1-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = fcmp oeq <2 x double> %a0, %a1
%2 = load <2 x double>, <2 x double> *%a2, align 16
%3 = fcmp oeq <2 x double> %a0, %2
%4 = or <2 x i1> %1, %3
%5 = sext <2 x i1> %4 to <2 x i64>
%6 = bitcast <2 x i64> %5 to <2 x double>
ret <2 x double> %6
}
define double @test_cmpsd(double %a0, double %a1, double *%a2) {
; GENERIC-LABEL: test_cmpsd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cmpsd:
; ATOM: # %bb.0:
; ATOM-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [5:5.00]
; ATOM-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [5:5.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cmpsd:
; SLM: # %bb.0:
; SLM-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cmpsd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cmpsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cmpsd:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cmpsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cmpsd:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cmpsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cmpsd:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cmpsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cmpsd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cmpsd:
; SKX: # %bb.0:
; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cmpsd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00]
; BDVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [7:1.00]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cmpsd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BDVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cmpsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00]
; BTVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [7:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cmpsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cmpsd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [10:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cmpsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = insertelement <2 x double> undef, double %a0, i32 0
%2 = insertelement <2 x double> undef, double %a1, i32 0
%3 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %1, <2 x double> %2, i8 0)
%4 = load double, double *%a2, align 8
%5 = insertelement <2 x double> undef, double %4, i32 0
%6 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %3, <2 x double> %5, i8 0)
%7 = extractelement <2 x double> %6, i32 0
ret double %7
}
declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; GENERIC-LABEL: test_comisd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
; GENERIC-NEXT: setnp %al # sched: [1:0.50]
; GENERIC-NEXT: sete %cl # sched: [1:0.50]
; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33]
; GENERIC-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
; GENERIC-NEXT: setnp %al # sched: [1:0.50]
; GENERIC-NEXT: sete %dl # sched: [1:0.50]
; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33]
; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33]
; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_comisd:
; ATOM: # %bb.0:
; ATOM-NEXT: comisd %xmm1, %xmm0 # sched: [9:4.50]
; ATOM-NEXT: setnp %al # sched: [1:0.50]
; ATOM-NEXT: sete %cl # sched: [1:0.50]
; ATOM-NEXT: andb %al, %cl # sched: [1:0.50]
; ATOM-NEXT: comisd (%rdi), %xmm0 # sched: [10:5.00]
; ATOM-NEXT: setnp %al # sched: [1:0.50]
; ATOM-NEXT: sete %dl # sched: [1:0.50]
; ATOM-NEXT: andb %al, %dl # sched: [1:0.50]
; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50]
; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_comisd:
; SLM: # %bb.0:
; SLM-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: setnp %al # sched: [1:0.50]
; SLM-NEXT: sete %cl # sched: [1:0.50]
; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
; SLM-NEXT: comisd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: setnp %al # sched: [1:0.50]
; SLM-NEXT: sete %dl # sched: [1:0.50]
; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_comisd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50]
; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
; SANDY-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50]
; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_comisd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: setnp %al # sched: [1:0.50]
; SANDY-NEXT: sete %cl # sched: [1:0.50]
; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
; SANDY-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00]
; SANDY-NEXT: setnp %al # sched: [1:0.50]
; SANDY-NEXT: sete %dl # sched: [1:0.50]
; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_comisd:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
; HASWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_comisd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: setnp %al # sched: [1:0.50]
; HASWELL-NEXT: sete %cl # sched: [1:0.50]
; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
; HASWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: setnp %al # sched: [1:0.50]
; HASWELL-NEXT: sete %dl # sched: [1:0.50]
; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_comisd:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
; BROADWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_comisd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
; BROADWELL-NEXT: sete %cl # sched: [1:0.50]
; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25]
; BROADWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
; BROADWELL-NEXT: sete %dl # sched: [1:0.50]
; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25]
; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_comisd:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50]
; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
; SKYLAKE-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00]
; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50]
; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_comisd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
; SKYLAKE-NEXT: sete %cl # sched: [1:0.50]
; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25]
; SKYLAKE-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00]
; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
; SKYLAKE-NEXT: sete %dl # sched: [1:0.50]
; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25]
; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25]
; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_comisd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
; SKX-SSE-NEXT: sete %cl # sched: [1:0.50]
; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
; SKX-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00]
; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
; SKX-SSE-NEXT: sete %dl # sched: [1:0.50]
; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_comisd:
; SKX: # %bb.0:
; SKX-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: setnp %al # sched: [1:0.50]
; SKX-NEXT: sete %cl # sched: [1:0.50]
; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
; SKX-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00]
; SKX-NEXT: setnp %al # sched: [1:0.50]
; SKX-NEXT: sete %dl # sched: [1:0.50]
; SKX-NEXT: andb %al, %dl # sched: [1:0.25]
; SKX-NEXT: orb %cl, %dl # sched: [1:0.25]
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_comisd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [1:1.00]
; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
; BDVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [6:1.00]
; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_comisd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [1:1.00]
; BDVER2-NEXT: setnp %al # sched: [1:0.50]
; BDVER2-NEXT: sete %cl # sched: [1:0.50]
; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50]
; BDVER2-NEXT: vcomisd (%rdi), %xmm0 # sched: [6:1.00]
; BDVER2-NEXT: setnp %al # sched: [1:0.50]
; BDVER2-NEXT: sete %dl # sched: [1:0.50]
; BDVER2-NEXT: andb %al, %dl # sched: [1:0.50]
; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_comisd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
; BTVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_comisd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: setnp %al # sched: [1:0.50]
; BTVER2-NEXT: sete %cl # sched: [1:0.50]
; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
; BTVER2-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: setnp %al # sched: [1:0.50]
; BTVER2-NEXT: sete %dl # sched: [1:0.50]
; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_comisd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25]
; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
; ZNVER1-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [10:1.00]
; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25]
; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_comisd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
; ZNVER1-NEXT: sete %cl # sched: [1:0.25]
; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
; ZNVER1-NEXT: vcomisd (%rdi), %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
; ZNVER1-NEXT: sete %dl # sched: [1:0.25]
; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25]
; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25]
; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
%2 = load <2 x double>, <2 x double> *%a2, align 8
%3 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %2)
%4 = or i32 %1, %3
ret i32 %4
}
declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
; GENERIC-LABEL: test_cvtdq2pd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
; GENERIC-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cvtdq2pd:
; ATOM: # %bb.0:
; ATOM-NEXT: cvtdq2pd (%rdi), %xmm1 # sched: [8:4.00]
; ATOM-NEXT: cvtdq2pd %xmm0, %xmm0 # sched: [7:3.50]
; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00]
; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cvtdq2pd:
; SLM: # %bb.0:
; SLM-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50]
; SLM-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cvtdq2pd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
; SANDY-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cvtdq2pd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
; SANDY-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00]
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cvtdq2pd:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
; HASWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cvtdq2pd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00]
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cvtdq2pd:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm1 # sched: [9:1.00]
; BROADWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cvtdq2pd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [9:1.00]
; BROADWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cvtdq2pd:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
; SKYLAKE-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtdq2pd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cvtdq2pd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtdq2pd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtdq2pd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [8:1.00]
; BDVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [13:1.00]
; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvtdq2pd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [13:1.00]
; BDVER2-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [8:1.00]
; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cvtdq2pd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [3:1.00]
; BTVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [8:1.00]
; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvtdq2pd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00]
; BTVER2-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cvtdq2pd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
; ZNVER1-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [12:1.00]
; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cvtdq2pd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [12:1.00]
; ZNVER1-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
%2 = sitofp <2 x i32> %1 to <2 x double>
%3 = load <4 x i32>, <4 x i32>*%a1, align 16
%4 = shufflevector <4 x i32> %3, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
%5 = sitofp <2 x i32> %4 to <2 x double>
%6 = fadd <2 x double> %2, %5
ret <2 x double> %6
}
define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) {
; GENERIC-LABEL: test_cvtdq2ps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
; GENERIC-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cvtdq2ps:
; ATOM: # %bb.0:
; ATOM-NEXT: cvtdq2ps (%rdi), %xmm1 # sched: [7:3.50]
; ATOM-NEXT: cvtdq2ps %xmm0, %xmm0 # sched: [6:3.00]
; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cvtdq2ps:
; SLM: # %bb.0:
; SLM-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
; SLM-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cvtdq2ps:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
; SANDY-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cvtdq2ps:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cvtdq2ps:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
; HASWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cvtdq2ps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cvtdq2ps:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00]
; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cvtdq2ps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cvtdq2ps:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtdq2ps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cvtdq2ps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtdq2ps:
; SKX: # %bb.0:
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtdq2ps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:1.00]
; BDVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvtdq2ps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00]
; BDVER2-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cvtdq2ps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
; BTVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00]
; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvtdq2ps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
; BTVER2-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cvtdq2ps:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [5:1.00]
; ZNVER1-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [12:1.00]
; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cvtdq2ps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [12:1.00]
; ZNVER1-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = sitofp <4 x i32> %a0 to <4 x float>
%2 = load <4 x i32>, <4 x i32>*%a1, align 16
%3 = sitofp <4 x i32> %2 to <4 x float>
%4 = fadd <4 x float> %1, %3
ret <4 x float> %4
}
define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; GENERIC-LABEL: test_cvtpd2dq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
; GENERIC-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cvtpd2dq:
; ATOM: # %bb.0:
; ATOM-NEXT: cvtpd2dq (%rdi), %xmm1 # sched: [8:4.00]
; ATOM-NEXT: cvtpd2dq %xmm0, %xmm0 # sched: [7:3.50]
; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cvtpd2dq:
; SLM: # %bb.0:
; SLM-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:0.50]
; SLM-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cvtpd2dq:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
; SANDY-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cvtpd2dq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
; SANDY-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [10:1.00]
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cvtpd2dq:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
; HASWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cvtpd2dq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cvtpd2dq:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm1 # sched: [9:1.00]
; BROADWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cvtpd2dq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
; BROADWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cvtpd2dq:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
; SKYLAKE-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00]
; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtpd2dq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cvtpd2dq:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00]
; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtpd2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtpd2dq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [8:1.00]
; BDVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [13:1.00]
; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvtpd2dq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [13:1.00]
; BDVER2-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [8:1.00]
; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cvtpd2dq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [3:1.00]
; BTVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [8:1.00]
; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvtpd2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
; BTVER2-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cvtpd2dq:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
; ZNVER1-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [12:1.00]
; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cvtpd2dq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [12:1.00]
; ZNVER1-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
%2 = load <2 x double>, <2 x double> *%a1, align 16
%3 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %2)
%4 = add <4 x i32> %1, %3
ret <4 x i32> %4
}
declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) {
; GENERIC-LABEL: test_cvtpd2ps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
; GENERIC-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cvtpd2ps:
; ATOM: # %bb.0:
; ATOM-NEXT: cvtpd2ps (%rdi), %xmm1 # sched: [8:4.00]
; ATOM-NEXT: cvtpd2ps %xmm0, %xmm0 # sched: [7:3.50]
; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cvtpd2ps:
; SLM: # %bb.0:
; SLM-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:0.50]
; SLM-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cvtpd2ps:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
; SANDY-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cvtpd2ps:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
; SANDY-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [10:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cvtpd2ps:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
; HASWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cvtpd2ps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cvtpd2ps:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 # sched: [9:1.00]
; BROADWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cvtpd2ps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
; BROADWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cvtpd2ps:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
; SKYLAKE-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtpd2ps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cvtpd2ps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtpd2ps:
; SKX: # %bb.0:
; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtpd2ps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [8:1.00]
; BDVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [13:1.00]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvtpd2ps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [13:1.00]
; BDVER2-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [8:1.00]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cvtpd2ps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [3:1.00]
; BTVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [8:1.00]
; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvtpd2ps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
; BTVER2-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cvtpd2ps:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
; ZNVER1-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cvtpd2ps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [11:1.00]
; ZNVER1-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
%2 = load <2 x double>, <2 x double> *%a1, align 16
%3 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %2)
%4 = fadd <4 x float> %1, %3
ret <4 x float> %4
}
declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) {
; GENERIC-LABEL: test_cvtps2dq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
; GENERIC-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cvtps2dq:
; ATOM: # %bb.0:
; ATOM-NEXT: cvtps2dq (%rdi), %xmm1 # sched: [7:3.50]
; ATOM-NEXT: cvtps2dq %xmm0, %xmm0 # sched: [6:3.00]
; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cvtps2dq:
; SLM: # %bb.0:
; SLM-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
; SLM-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cvtps2dq:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
; SANDY-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cvtps2dq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00]
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cvtps2dq:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
; HASWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cvtps2dq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00]
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cvtps2dq:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00]
; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cvtps2dq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00]
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cvtps2dq:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtps2dq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cvtps2dq:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtps2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtps2dq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:1.00]
; BDVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvtps2dq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00]
; BDVER2-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:1.00]
; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cvtps2dq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
; BTVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00]
; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvtps2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00]
; BTVER2-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cvtps2dq:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [5:1.00]
; ZNVER1-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [12:1.00]
; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cvtps2dq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [12:1.00]
; ZNVER1-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
%2 = load <4 x float>, <4 x float> *%a1, align 16
%3 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %2)
%4 = add <4 x i32> %1, %3
ret <4 x i32> %4
}
declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
; GENERIC-LABEL: test_cvtps2pd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
; GENERIC-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cvtps2pd:
; ATOM: # %bb.0:
; ATOM-NEXT: cvtps2pd (%rdi), %xmm1 # sched: [8:4.00]
; ATOM-NEXT: cvtps2pd %xmm0, %xmm0 # sched: [7:3.50]
; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00]
; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cvtps2pd:
; SLM: # %bb.0:
; SLM-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [4:0.50]
; SLM-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cvtps2pd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
; SANDY-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cvtps2pd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00]
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cvtps2pd:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
; HASWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00]
; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cvtps2pd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
; HASWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [6:1.00]
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cvtps2pd:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
; BROADWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00]
; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cvtps2pd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
; BROADWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [6:1.00]
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cvtps2pd:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
; SKYLAKE-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtps2pd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cvtps2pd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtps2pd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtps2pd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [8:1.00]
; BDVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [13:1.00]
; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvtps2pd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [13:1.00]
; BDVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [8:1.00]
; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cvtps2pd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
; BTVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvtps2pd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00]
; BTVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cvtps2pd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [10:1.00]
; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cvtps2pd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [10:1.00]
; ZNVER1-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%2 = fpext <2 x float> %1 to <2 x double>
%3 = load <4 x float>, <4 x float> *%a1, align 16
%4 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%5 = fpext <2 x float> %4 to <2 x double>
%6 = fadd <2 x double> %2, %5
ret <2 x double> %6
}
define i32 @test_cvtsd2si(double %a0, double *%a1) {
; GENERIC-LABEL: test_cvtsd2si:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00]
; GENERIC-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cvtsd2si:
; ATOM: # %bb.0:
; ATOM-NEXT: cvtsd2si (%rdi), %eax # sched: [9:4.50]
; ATOM-NEXT: cvtsd2si %xmm0, %ecx # sched: [8:4.00]
; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cvtsd2si:
; SLM: # %bb.0:
; SLM-NEXT: cvtsd2si (%rdi), %eax # sched: [7:1.00]
; SLM-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:0.50]
; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cvtsd2si:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00]
; SANDY-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cvtsd2si:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00]
; SANDY-NEXT: vcvtsd2si (%rdi), %eax # sched: [10:1.00]
; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cvtsd2si:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00]
; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cvtsd2si:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00]
; HASWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cvtsd2si:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00]
; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cvtsd2si:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00]
; BROADWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00]
; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cvtsd2si:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00]
; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00]
; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtsd2si:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00]
; SKYLAKE-NEXT: vcvtsd2si (%rdi), %eax # sched: [11:1.00]
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cvtsd2si:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00]
; SKX-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00]
; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtsd2si:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00]
; SKX-NEXT: vcvtsd2si (%rdi), %eax # sched: [11:1.00]
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtsd2si:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [18:1.00]
; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [13:1.00]
; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvtsd2si:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [18:1.00]
; BDVER2-NEXT: vcvtsd2si %xmm0, %ecx # sched: [13:1.00]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsd2si:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00]
; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [7:1.00]
; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvtsd2si:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [12:1.00]
; BTVER2-NEXT: vcvtsd2si %xmm0, %ecx # sched: [7:1.00]
; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cvtsd2si:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00]
; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00]
; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cvtsd2si:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtsd2si (%rdi), %eax # sched: [12:1.00]
; ZNVER1-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = insertelement <2 x double> undef, double %a0, i32 0
%2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %1)
%3 = load double, double *%a1, align 8
%4 = insertelement <2 x double> undef, double %3, i32 0
%5 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %4)
%6 = add i32 %2, %5
ret i32 %6
}
declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
define i64 @test_cvtsd2siq(double %a0, double *%a1) {
; GENERIC-LABEL: test_cvtsd2siq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00]
; GENERIC-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cvtsd2siq:
; ATOM: # %bb.0:
; ATOM-NEXT: cvtsd2si (%rdi), %rax # sched: [9:4.50]
; ATOM-NEXT: cvtsd2si %xmm0, %rcx # sched: [8:4.00]
; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cvtsd2siq:
; SLM: # %bb.0:
; SLM-NEXT: cvtsd2si (%rdi), %rax # sched: [7:1.00]
; SLM-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:0.50]
; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cvtsd2siq:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00]
; SANDY-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cvtsd2siq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00]
; SANDY-NEXT: vcvtsd2si (%rdi), %rax # sched: [10:1.00]
; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cvtsd2siq:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00]
; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cvtsd2siq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00]
; HASWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cvtsd2siq:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00]
; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cvtsd2siq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00]
; BROADWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00]
; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cvtsd2siq:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00]
; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00]
; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtsd2siq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00]
; SKYLAKE-NEXT: vcvtsd2si (%rdi), %rax # sched: [11:1.00]
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cvtsd2siq:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00]
; SKX-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00]
; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtsd2siq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00]
; SKX-NEXT: vcvtsd2si (%rdi), %rax # sched: [11:1.00]
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtsd2siq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [18:1.00]
; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [13:1.00]
; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvtsd2siq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [18:1.00]
; BDVER2-NEXT: vcvtsd2si %xmm0, %rcx # sched: [13:1.00]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsd2siq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00]
; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [7:1.00]
; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvtsd2siq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [12:1.00]
; BTVER2-NEXT: vcvtsd2si %xmm0, %rcx # sched: [7:1.00]
; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cvtsd2siq:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00]
; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00]
; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cvtsd2siq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtsd2si (%rdi), %rax # sched: [12:1.00]
; ZNVER1-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = insertelement <2 x double> undef, double %a0, i32 0
%2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %1)
%3 = load double, double *%a1, align 8
%4 = insertelement <2 x double> undef, double %3, i32 0
%5 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %4)
%6 = add i64 %2, %5
ret i64 %6
}
declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
define float @test_cvtsd2ss(double %a0, double *%a1) {
; GENERIC-LABEL: test_cvtsd2ss:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cvtsd2ss:
; ATOM: # %bb.0:
; ATOM-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00]
; ATOM-NEXT: cvtsd2ss %xmm0, %xmm2 # sched: [6:3.00]
; ATOM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: cvtsd2ss %xmm1, %xmm0 # sched: [6:3.00]
; ATOM-NEXT: addss %xmm2, %xmm0 # sched: [5:5.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cvtsd2ss:
; SLM: # %bb.0:
; SLM-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:0.50]
; SLM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [3:1.00]
; SLM-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:0.50]
; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cvtsd2ss:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cvtsd2ss:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
; SANDY-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cvtsd2ss:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cvtsd2ss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
; HASWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cvtsd2ss:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cvtsd2ss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
; BROADWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
; BROADWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cvtsd2ss:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtsd2ss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
; SKYLAKE-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cvtsd2ss:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtsd2ss:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
; SKX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtsd2ss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
; BDVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
; BDVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvtsd2ss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
; BDVER2-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
; BDVER2-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsd2ss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [7:2.00]
; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [7:2.00]
; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvtsd2ss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [7:2.00]
; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
; BTVER2-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [7:2.00]
; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cvtsd2ss:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cvtsd2ss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
; ZNVER1-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
; ZNVER1-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = fptrunc double %a0 to float
%2 = load double, double *%a1, align 8
%3 = fptrunc double %2 to float
%4 = fadd float %1, %3
ret float %4
}
define double @test_cvtsi2sd(i32 %a0, i32 *%a1) {
; GENERIC-LABEL: test_cvtsi2sd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
; GENERIC-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cvtsi2sd:
; ATOM: # %bb.0:
; ATOM-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [7:3.50]
; ATOM-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [6:3.00]
; ATOM-NEXT: addsd %xmm1, %xmm0 # sched: [5:5.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cvtsi2sd:
; SLM: # %bb.0:
; SLM-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:0.50]
; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cvtsi2sd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
; SANDY-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cvtsi2sd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
; SANDY-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cvtsi2sd:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
; HASWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cvtsi2sd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cvtsi2sd:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
; BROADWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cvtsi2sd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
; BROADWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cvtsi2sd:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
; SKYLAKE-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtsi2sd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cvtsi2sd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtsi2sd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtsi2sd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvtsi2sd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
; BDVER2-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsi2sd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [14:1.00]
; BTVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [9:1.00]
; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvtsi2sd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [9:1.00]
; BTVER2-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [14:1.00]
; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cvtsi2sd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [12:1.00]
; ZNVER1-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cvtsi2sd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = sitofp i32 %a0 to double
%2 = load i32, i32 *%a1, align 8
%3 = sitofp i32 %2 to double
%4 = fadd double %1, %3
ret double %4
}
define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) {
; GENERIC-LABEL: test_cvtsi2sdq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
; GENERIC-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cvtsi2sdq:
; ATOM: # %bb.0:
; ATOM-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [7:3.50]
; ATOM-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [6:3.00]
; ATOM-NEXT: addsd %xmm1, %xmm0 # sched: [5:5.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cvtsi2sdq:
; SLM: # %bb.0:
; SLM-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:0.50]
; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cvtsi2sdq:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
; SANDY-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cvtsi2sdq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
; SANDY-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cvtsi2sdq:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
; HASWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cvtsi2sdq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cvtsi2sdq:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
; BROADWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cvtsi2sdq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
; BROADWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cvtsi2sdq:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
; SKYLAKE-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtsi2sdq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cvtsi2sdq:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtsi2sdq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtsi2sdq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [13:1.00]
; BDVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvtsi2sdq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
; BDVER2-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsi2sdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [14:1.00]
; BTVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [9:1.00]
; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvtsi2sdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [9:1.00]
; BTVER2-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [14:1.00]
; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cvtsi2sdq:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [12:1.00]
; ZNVER1-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cvtsi2sdq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = sitofp i64 %a0 to double
%2 = load i64, i64 *%a1, align 8
%3 = sitofp i64 %2 to double
%4 = fadd double %1, %3
ret double %4
}
; TODO - cvtss2sd_m
define double @test_cvtss2sd(float %a0, float *%a1) {
; GENERIC-LABEL: test_cvtss2sd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00]
; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
; GENERIC-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cvtss2sd:
; ATOM: # %bb.0:
; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
; ATOM-NEXT: cvtss2sd %xmm0, %xmm2 # sched: [6:3.00]
; ATOM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: cvtss2sd %xmm1, %xmm0 # sched: [6:3.00]
; ATOM-NEXT: addsd %xmm2, %xmm0 # sched: [5:5.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cvtss2sd:
; SLM: # %bb.0:
; SLM-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:0.50]
; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00]
; SLM-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:0.50]
; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cvtss2sd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00]
; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cvtss2sd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cvtss2sd:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00]
; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00]
; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cvtss2sd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; HASWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00]
; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cvtss2sd:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00]
; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00]
; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cvtss2sd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; BROADWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00]
; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cvtss2sd:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtss2sd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; SKYLAKE-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cvtss2sd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtss2sd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtss2sd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:1.00]
; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:1.00]
; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvtss2sd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; BDVER2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
; BDVER2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cvtss2sd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [7:2.00]
; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [7:2.00]
; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvtss2sd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [7:2.00]
; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
; BTVER2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [7:2.00]
; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cvtss2sd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:1.00]
; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:1.00]
; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cvtss2sd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
; ZNVER1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
; ZNVER1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = fpext float %a0 to double
%2 = load float, float *%a1, align 4
%3 = fpext float %2 to double
%4 = fadd double %1, %3
ret double %4
}
define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; GENERIC-LABEL: test_cvttpd2dq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
; GENERIC-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cvttpd2dq:
; ATOM: # %bb.0:
; ATOM-NEXT: cvttpd2dq (%rdi), %xmm1 # sched: [8:4.00]
; ATOM-NEXT: cvttpd2dq %xmm0, %xmm0 # sched: [7:3.50]
; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cvttpd2dq:
; SLM: # %bb.0:
; SLM-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:0.50]
; SLM-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cvttpd2dq:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
; SANDY-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cvttpd2dq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
; SANDY-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [10:1.00]
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cvttpd2dq:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
; HASWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cvttpd2dq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cvttpd2dq:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm1 # sched: [9:1.00]
; BROADWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cvttpd2dq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
; BROADWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cvttpd2dq:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
; SKYLAKE-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00]
; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvttpd2dq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cvttpd2dq:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00]
; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvttpd2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvttpd2dq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [8:1.00]
; BDVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [13:1.00]
; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvttpd2dq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [13:1.00]
; BDVER2-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [8:1.00]
; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cvttpd2dq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [3:1.00]
; BTVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [8:1.00]
; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvttpd2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
; BTVER2-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cvttpd2dq:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
; ZNVER1-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [12:1.00]
; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cvttpd2dq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [12:1.00]
; ZNVER1-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = fptosi <2 x double> %a0 to <2 x i32>
%2 = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = load <2 x double>, <2 x double> *%a1, align 16
%4 = fptosi <2 x double> %3 to <2 x i32>
%5 = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = add <4 x i32> %2, %5
ret <4 x i32> %6
}
define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) {
; GENERIC-LABEL: test_cvttps2dq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
; GENERIC-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cvttps2dq:
; ATOM: # %bb.0:
; ATOM-NEXT: cvttps2dq (%rdi), %xmm1 # sched: [7:3.50]
; ATOM-NEXT: cvttps2dq %xmm0, %xmm0 # sched: [6:3.00]
; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cvttps2dq:
; SLM: # %bb.0:
; SLM-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
; SLM-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cvttps2dq:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
; SANDY-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cvttps2dq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00]
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cvttps2dq:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
; HASWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cvttps2dq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00]
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cvttps2dq:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00]
; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cvttps2dq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00]
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cvttps2dq:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvttps2dq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cvttps2dq:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvttps2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvttps2dq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:1.00]
; BDVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvttps2dq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00]
; BDVER2-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:1.00]
; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-SSE-LABEL: test_cvttps2dq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
; BTVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00]
; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvttps2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00]
; BTVER2-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_cvttps2dq:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [5:1.00]
; ZNVER1-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [12:1.00]
; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_cvttps2dq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [12:1.00]
; ZNVER1-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = fptosi <4 x float> %a0 to <4 x i32>
%2 = load <4 x float>, <4 x float> *%a1, align 16
%3 = fptosi <4 x float> %2 to <4 x i32>
%4 = add <4 x i32> %1, %3
ret <4 x i32> %4
}
define i32 @test_cvttsd2si(double %a0, double *%a1) {
; GENERIC-LABEL: test_cvttsd2si:
; GENERIC: # %bb.0:
; GENERIC-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00]
; GENERIC-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_cvttsd2si:
; ATOM: # %bb.0:
; ATOM-NEXT: cvttsd2si (%rdi), %eax # sched: [9:4.50]
; ATOM-NEXT: cvttsd2si %xmm0, %ecx # sched: [8:4.00]
; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_cvttsd2si:
; SLM: # %bb.0:
; SLM-NEXT: cvttsd2si (%rdi), %eax # sched: [7:1.00]
; SLM-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:0.50]
; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_cvttsd2si:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00]
; SANDY-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_cvttsd2si:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00]
; SANDY-NEXT: vcvttsd2si (%rdi), %eax # sched: [10:1.00]
; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_cvttsd2si:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00]
; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_cvttsd2si:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00]
; HASWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_cvttsd2si:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00]
; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_cvttsd2si:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00]
; BROADWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00]
; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_cvttsd2si:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00]
; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00]
; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvttsd2si:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00]
; SKYLAKE-NEXT: vcvttsd2si (%rdi), %eax # sched: [11:1.00]
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_cvttsd2si:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00]
; SKX-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00]
; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-SSE-NEXT: retq # sched: [7:1.00]