blob: 92516b78760e27e438bf8de847e7cde5bc2372db [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_add_epi8:
; SSE: # %bb.0:
; SSE-NEXT: paddb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfc,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_add_epi8:
; AVX1: # %bb.0:
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_add_epi8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%res = add <16 x i8> %arg0, %arg1
%bc = bitcast <16 x i8> %res to <2 x i64>
ret <2 x i64> %bc
}
define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_add_epi16:
; SSE: # %bb.0:
; SSE-NEXT: paddw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfd,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_add_epi16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_add_epi16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
%res = add <8 x i16> %arg0, %arg1
%bc = bitcast <8 x i16> %res to <2 x i64>
ret <2 x i64> %bc
}
define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_add_epi32:
; SSE: # %bb.0:
; SSE-NEXT: paddd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfe,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_add_epi32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_add_epi32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
%res = add <4 x i32> %arg0, %arg1
%bc = bitcast <4 x i32> %res to <2 x i64>
ret <2 x i64> %bc
}
define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_add_epi64:
; SSE: # %bb.0:
; SSE-NEXT: paddq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd4,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_add_epi64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd4,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_add_epi64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = add <2 x i64> %a0, %a1
ret <2 x i64> %res
}
define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_add_pd:
; SSE: # %bb.0:
; SSE-NEXT: addpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x58,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_add_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_add_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = fadd <2 x double> %a0, %a1
ret <2 x double> %res
}
define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_add_sd:
; SSE: # %bb.0:
; SSE-NEXT: addsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x58,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_add_sd:
; AVX1: # %bb.0:
; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x58,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_add_sd:
; AVX512: # %bb.0:
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x58,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%ext0 = extractelement <2 x double> %a0, i32 0
%ext1 = extractelement <2 x double> %a1, i32 0
%fadd = fadd double %ext0, %ext1
%res = insertelement <2 x double> %a0, double %fadd, i32 0
ret <2 x double> %res
}
define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_adds_epi8:
; SSE: # %bb.0:
; SSE-NEXT: paddsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xec,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_adds_epi8:
; AVX1: # %bb.0:
; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xec,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_adds_epi8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%res = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
%bc = bitcast <16 x i8> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_adds_epi16:
; SSE: # %bb.0:
; SSE-NEXT: paddsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xed,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_adds_epi16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xed,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_adds_epi16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
%res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
%bc = bitcast <8 x i16> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_adds_epu8:
; SSE: # %bb.0:
; SSE-NEXT: paddusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdc,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_adds_epu8:
; AVX1: # %bb.0:
; AVX1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdc,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_adds_epu8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%res = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
%bc = bitcast <16 x i8> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_adds_epu16:
; SSE: # %bb.0:
; SSE-NEXT: paddusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdd,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_adds_epu16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdd,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_adds_epu16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
%res = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
%bc = bitcast <8 x i16> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_and_pd:
; SSE: # %bb.0:
; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_and_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_and_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x double> %a0 to <4 x i32>
%arg1 = bitcast <2 x double> %a1 to <4 x i32>
%res = and <4 x i32> %arg0, %arg1
%bc = bitcast <4 x i32> %res to <2 x double>
ret <2 x double> %bc
}
define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_and_si128:
; SSE: # %bb.0:
; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_and_si128:
; AVX1: # %bb.0:
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_and_si128:
; AVX512: # %bb.0:
; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = and <2 x i64> %a0, %a1
ret <2 x i64> %res
}
define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_andnot_pd:
; SSE: # %bb.0:
; SSE-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2]
; SSE-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2]
; SSE-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_andnot_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_andnot_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x double> %a0 to <4 x i32>
%arg1 = bitcast <2 x double> %a1 to <4 x i32>
%not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
%res = and <4 x i32> %not, %arg1
%bc = bitcast <4 x i32> %res to <2 x double>
ret <2 x double> %bc
}
define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_andnot_si128:
; SSE: # %bb.0:
; SSE-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2]
; SSE-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2]
; SSE-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_andnot_si128:
; AVX1: # %bb.0:
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_andnot_si128:
; AVX512: # %bb.0:
; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%not = xor <2 x i64> %a0, <i64 -1, i64 -1>
%res = and <2 x i64> %not, %a1
ret <2 x i64> %res
}
define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_avg_epu8:
; SSE: # %bb.0:
; SSE-NEXT: pavgb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe0,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_avg_epu8:
; AVX1: # %bb.0:
; AVX1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe0,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_avg_epu8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1)
%bc = bitcast <16 x i8> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwind readnone
define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_avg_epu16:
; SSE: # %bb.0:
; SSE-NEXT: pavgw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe3,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_avg_epu16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe3,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_avg_epu16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
%res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %arg0, <8 x i16> %arg1)
%bc = bitcast <8 x i16> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind {
; SSE-LABEL: test_mm_bslli_si128:
; SSE: # %bb.0:
; SSE-NEXT: pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05]
; SSE-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_bslli_si128:
; AVX1: # %bb.0:
; AVX1-NEXT: vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05]
; AVX1-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_bslli_si128:
; AVX512: # %bb.0:
; AVX512-NEXT: vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05]
; AVX512-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
%bc = bitcast <16 x i8> %res to <2 x i64>
ret <2 x i64> %bc
}
define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind {
; SSE-LABEL: test_mm_bsrli_si128:
; SSE: # %bb.0:
; SSE-NEXT: psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05]
; SSE-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_bsrli_si128:
; AVX1: # %bb.0:
; AVX1-NEXT: vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05]
; AVX1-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_bsrli_si128:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05]
; AVX512-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
%bc = bitcast <16 x i8> %res to <2 x i64>
ret <2 x i64> %bc
}
define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind {
; CHECK-LABEL: test_mm_castpd_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = bitcast <2 x double> %a0 to <4 x float>
ret <4 x float> %res
}
define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind {
; CHECK-LABEL: test_mm_castpd_si128:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = bitcast <2 x double> %a0 to <2 x i64>
ret <2 x i64> %res
}
define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind {
; CHECK-LABEL: test_mm_castps_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = bitcast <4 x float> %a0 to <2 x double>
ret <2 x double> %res
}
define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind {
; CHECK-LABEL: test_mm_castps_si128:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = bitcast <4 x float> %a0 to <2 x i64>
ret <2 x i64> %res
}
define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind {
; CHECK-LABEL: test_mm_castsi128_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = bitcast <2 x i64> %a0 to <2 x double>
ret <2 x double> %res
}
define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind {
; CHECK-LABEL: test_mm_castsi128_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = bitcast <2 x i64> %a0 to <4 x float>
ret <4 x float> %res
}
define void @test_mm_clflush(i8* %a0) nounwind {
; X86-LABEL: test_mm_clflush:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: clflush (%eax) # encoding: [0x0f,0xae,0x38]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mm_clflush:
; X64: # %bb.0:
; X64-NEXT: clflush (%rdi) # encoding: [0x0f,0xae,0x3f]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.sse2.clflush(i8* %a0)
ret void
}
declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone
define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_cmpeq_epi8:
; SSE: # %bb.0:
; SSE-NEXT: pcmpeqb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x74,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpeq_epi8:
; AVX1: # %bb.0:
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x74,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpeq_epi8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%cmp = icmp eq <16 x i8> %arg0, %arg1
%res = sext <16 x i1> %cmp to <16 x i8>
%bc = bitcast <16 x i8> %res to <2 x i64>
ret <2 x i64> %bc
}
define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_cmpeq_epi16:
; SSE: # %bb.0:
; SSE-NEXT: pcmpeqw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x75,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpeq_epi16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x75,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpeq_epi16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
%cmp = icmp eq <8 x i16> %arg0, %arg1
%res = sext <8 x i1> %cmp to <8 x i16>
%bc = bitcast <8 x i16> %res to <2 x i64>
ret <2 x i64> %bc
}
define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_cmpeq_epi32:
; SSE: # %bb.0:
; SSE-NEXT: pcmpeqd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x76,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpeq_epi32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x76,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpeq_epi32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
%cmp = icmp eq <4 x i32> %arg0, %arg1
%res = sext <4 x i1> %cmp to <4 x i32>
%bc = bitcast <4 x i32> %res to <2 x i64>
ret <2 x i64> %bc
}
define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpeq_pd:
; SSE: # %bb.0:
; SSE-NEXT: cmpeqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x00]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpeq_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x00]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpeq_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x00]
; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%fcmp = fcmp oeq <2 x double> %a0, %a1
%sext = sext <2 x i1> %fcmp to <2 x i64>
%res = bitcast <2 x i64> %sext to <2 x double>
ret <2 x double> %res
}
define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpeq_sd:
; SSE: # %bb.0:
; SSE-NEXT: cmpeqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x00]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX-LABEL: test_mm_cmpeq_sd:
; AVX: # %bb.0:
; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x00]
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpge_pd:
; SSE: # %bb.0:
; SSE-NEXT: cmplepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x02]
; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpge_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x02]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpge_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x02]
; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%fcmp = fcmp ole <2 x double> %a1, %a0
%sext = sext <2 x i1> %fcmp to <2 x i64>
%res = bitcast <2 x i64> %sext to <2 x double>
ret <2 x double> %res
}
define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpge_sd:
; SSE: # %bb.0:
; SSE-NEXT: cmplesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x02]
; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX-LABEL: test_mm_cmpge_sd:
; AVX: # %bb.0:
; AVX-NEXT: vcmplesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x02]
; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1]
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2)
%ext0 = extractelement <2 x double> %cmp, i32 0
%ins0 = insertelement <2 x double> undef, double %ext0, i32 0
%ext1 = extractelement <2 x double> %a0, i32 1
%ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
ret <2 x double> %ins1
}
define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_cmpgt_epi8:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x64,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpgt_epi8:
; AVX1: # %bb.0:
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x64,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpgt_epi8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%cmp = icmp sgt <16 x i8> %arg0, %arg1
%res = sext <16 x i1> %cmp to <16 x i8>
%bc = bitcast <16 x i8> %res to <2 x i64>
ret <2 x i64> %bc
}
define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_cmpgt_epi16:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x65,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpgt_epi16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x65,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpgt_epi16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
%cmp = icmp sgt <8 x i16> %arg0, %arg1
%res = sext <8 x i1> %cmp to <8 x i16>
%bc = bitcast <8 x i16> %res to <2 x i64>
ret <2 x i64> %bc
}
define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_cmpgt_epi32:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x66,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpgt_epi32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x66,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpgt_epi32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
%cmp = icmp sgt <4 x i32> %arg0, %arg1
%res = sext <4 x i1> %cmp to <4 x i32>
%bc = bitcast <4 x i32> %res to <2 x i64>
ret <2 x i64> %bc
}
define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpgt_pd:
; SSE: # %bb.0:
; SSE-NEXT: cmpltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x01]
; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpgt_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x01]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpgt_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x01]
; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%fcmp = fcmp olt <2 x double> %a1, %a0
%sext = sext <2 x i1> %fcmp to <2 x i64>
%res = bitcast <2 x i64> %sext to <2 x double>
ret <2 x double> %res
}
define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpgt_sd:
; SSE: # %bb.0:
; SSE-NEXT: cmpltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x01]
; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX-LABEL: test_mm_cmpgt_sd:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x01]
; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1]
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1)
%ext0 = extractelement <2 x double> %cmp, i32 0
%ins0 = insertelement <2 x double> undef, double %ext0, i32 0
%ext1 = extractelement <2 x double> %a0, i32 1
%ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
ret <2 x double> %ins1
}
define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmple_pd:
; SSE: # %bb.0:
; SSE-NEXT: cmplepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x02]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmple_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmplepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x02]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmple_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%fcmp = fcmp ole <2 x double> %a0, %a1
%sext = sext <2 x i1> %fcmp to <2 x i64>
%res = bitcast <2 x i64> %sext to <2 x double>
ret <2 x double> %res
}
define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmple_sd:
; SSE: # %bb.0:
; SSE-NEXT: cmplesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x02]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX-LABEL: test_mm_cmple_sd:
; AVX: # %bb.0:
; AVX-NEXT: vcmplesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x02]
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2)
ret <2 x double> %res
}
define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_cmplt_epi8:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtb %xmm0, %xmm1 # encoding: [0x66,0x0f,0x64,0xc8]
; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmplt_epi8:
; AVX1: # %bb.0:
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x64,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmplt_epi8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc0]
; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%cmp = icmp sgt <16 x i8> %arg1, %arg0
%res = sext <16 x i1> %cmp to <16 x i8>
%bc = bitcast <16 x i8> %res to <2 x i64>
ret <2 x i64> %bc
}
define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_cmplt_epi16:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x65,0xc8]
; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmplt_epi16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x65,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmplt_epi16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc0]
; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
%cmp = icmp sgt <8 x i16> %arg1, %arg0
%res = sext <8 x i1> %cmp to <8 x i16>
%bc = bitcast <8 x i16> %res to <2 x i64>
ret <2 x i64> %bc
}
define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_cmplt_epi32:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x66,0xc8]
; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmplt_epi32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x66,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmplt_epi32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x66,0xc0]
; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
%cmp = icmp sgt <4 x i32> %arg1, %arg0
%res = sext <4 x i1> %cmp to <4 x i32>
%bc = bitcast <4 x i32> %res to <2 x i64>
ret <2 x i64> %bc
}
define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmplt_pd:
; SSE: # %bb.0:
; SSE-NEXT: cmpltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x01]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmplt_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x01]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmplt_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x01]
; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%fcmp = fcmp olt <2 x double> %a0, %a1
%sext = sext <2 x i1> %fcmp to <2 x i64>
%res = bitcast <2 x i64> %sext to <2 x double>
ret <2 x double> %res
}
define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmplt_sd:
; SSE: # %bb.0:
; SSE-NEXT: cmpltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x01]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX-LABEL: test_mm_cmplt_sd:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x01]
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1)
ret <2 x double> %res
}
define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpneq_pd:
; SSE: # %bb.0:
; SSE-NEXT: cmpneqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x04]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpneq_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpneqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x04]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpneq_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpneqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x04]
; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%fcmp = fcmp une <2 x double> %a0, %a1
%sext = sext <2 x i1> %fcmp to <2 x i64>
%res = bitcast <2 x i64> %sext to <2 x double>
ret <2 x double> %res
}
define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpneq_sd:
; SSE: # %bb.0:
; SSE-NEXT: cmpneqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x04]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX-LABEL: test_mm_cmpneq_sd:
; AVX: # %bb.0:
; AVX-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x04]
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4)
ret <2 x double> %res
}
define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpnge_pd:
; SSE: # %bb.0:
; SSE-NEXT: cmpnlepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x06]
; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpnge_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpnlepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x06]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpnge_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpnlepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x06]
; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%fcmp = fcmp ugt <2 x double> %a1, %a0
%sext = sext <2 x i1> %fcmp to <2 x i64>
%res = bitcast <2 x i64> %sext to <2 x double>
ret <2 x double> %res
}
define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpnge_sd:
; SSE: # %bb.0:
; SSE-NEXT: cmpnlesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x06]
; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX-LABEL: test_mm_cmpnge_sd:
; AVX: # %bb.0:
; AVX-NEXT: vcmpnlesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x06]
; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1]
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6)
%ext0 = extractelement <2 x double> %cmp, i32 0
%ins0 = insertelement <2 x double> undef, double %ext0, i32 0
%ext1 = extractelement <2 x double> %a0, i32 1
%ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
ret <2 x double> %ins1
}
define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpngt_pd:
; SSE: # %bb.0:
; SSE-NEXT: cmpnltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x05]
; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpngt_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpnltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x05]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpngt_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpnltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x05]
; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%fcmp = fcmp uge <2 x double> %a1, %a0
%sext = sext <2 x i1> %fcmp to <2 x i64>
%res = bitcast <2 x i64> %sext to <2 x double>
ret <2 x double> %res
}
define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpngt_sd:
; SSE: # %bb.0:
; SSE-NEXT: cmpnltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x05]
; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX-LABEL: test_mm_cmpngt_sd:
; AVX: # %bb.0:
; AVX-NEXT: vcmpnltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x05]
; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1]
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5)
%ext0 = extractelement <2 x double> %cmp, i32 0
%ins0 = insertelement <2 x double> undef, double %ext0, i32 0
%ext1 = extractelement <2 x double> %a0, i32 1
%ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
ret <2 x double> %ins1
}
define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpnle_pd:
; SSE: # %bb.0:
; SSE-NEXT: cmpnlepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x06]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpnle_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpnlepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x06]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpnle_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpnlepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x06]
; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%fcmp = fcmp ugt <2 x double> %a0, %a1
%sext = sext <2 x i1> %fcmp to <2 x i64>
%res = bitcast <2 x i64> %sext to <2 x double>
ret <2 x double> %res
}
define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpnle_sd:
; SSE: # %bb.0:
; SSE-NEXT: cmpnlesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x06]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX-LABEL: test_mm_cmpnle_sd:
; AVX: # %bb.0:
; AVX-NEXT: vcmpnlesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x06]
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6)
ret <2 x double> %res
}
define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpnlt_pd:
; SSE: # %bb.0:
; SSE-NEXT: cmpnltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x05]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpnlt_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpnltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x05]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpnlt_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpnltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x05]
; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%fcmp = fcmp uge <2 x double> %a0, %a1
%sext = sext <2 x i1> %fcmp to <2 x i64>
%res = bitcast <2 x i64> %sext to <2 x double>
ret <2 x double> %res
}
define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpnlt_sd:
; SSE: # %bb.0:
; SSE-NEXT: cmpnltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x05]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX-LABEL: test_mm_cmpnlt_sd:
; AVX: # %bb.0:
; AVX-NEXT: vcmpnltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x05]
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5)
ret <2 x double> %res
}
define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpord_pd:
; SSE: # %bb.0:
; SSE-NEXT: cmpordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x07]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpord_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x07]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpord_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x07]
; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%fcmp = fcmp ord <2 x double> %a0, %a1
%sext = sext <2 x i1> %fcmp to <2 x i64>
%res = bitcast <2 x i64> %sext to <2 x double>
ret <2 x double> %res
}
define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpord_sd:
; SSE: # %bb.0:
; SSE-NEXT: cmpordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x07]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX-LABEL: test_mm_cmpord_sd:
; AVX: # %bb.0:
; AVX-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x07]
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7)
ret <2 x double> %res
}
define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpunord_pd:
; SSE: # %bb.0:
; SSE-NEXT: cmpunordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x03]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cmpunord_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpunordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x03]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cmpunord_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpunordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x03]
; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%fcmp = fcmp uno <2 x double> %a0, %a1
%sext = sext <2 x i1> %fcmp to <2 x i64>
%res = bitcast <2 x i64> %sext to <2 x double>
ret <2 x double> %res
}
define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_cmpunord_sd:
; SSE: # %bb.0:
; SSE-NEXT: cmpunordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x03]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX-LABEL: test_mm_cmpunord_sd:
; AVX: # %bb.0:
; AVX-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x03]
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3)
ret <2 x double> %res
}
define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_comieq_sd:
; SSE: # %bb.0:
; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_comieq_sd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_comieq_sd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
ret i32 %res
}
declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_comige_sd:
; SSE: # %bb.0:
; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_comige_sd:
; AVX1: # %bb.0:
; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_comige_sd:
; AVX512: # %bb.0:
; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1)
ret i32 %res
}
declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_comigt_sd:
; SSE: # %bb.0:
; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_comigt_sd:
; AVX1: # %bb.0:
; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_comigt_sd:
; AVX512: # %bb.0:
; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1)
ret i32 %res
}
declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_comile_sd:
; SSE: # %bb.0:
; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; SSE-NEXT: comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8]
; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_comile_sd:
; AVX1: # %bb.0:
; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; AVX1-NEXT: vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8]
; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_comile_sd:
; AVX512: # %bb.0:
; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; AVX512-NEXT: vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1)
ret i32 %res
}
declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_comilt_sd:
; SSE: # %bb.0:
; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; SSE-NEXT: comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8]
; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_comilt_sd:
; AVX1: # %bb.0:
; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; AVX1-NEXT: vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8]
; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_comilt_sd:
; AVX512: # %bb.0:
; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; AVX512-NEXT: vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1)
ret i32 %res
}
declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_comineq_sd:
; SSE: # %bb.0:
; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_comineq_sd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_comineq_sd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1)
ret i32 %res
}
declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind {
; SSE-LABEL: test_mm_cvtepi32_pd:
; SSE: # %bb.0:
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 # encoding: [0xf3,0x0f,0xe6,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cvtepi32_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cvtepi32_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1>
%res = sitofp <2 x i32> %ext to <2 x double>
ret <2 x double> %res
}
define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind {
; SSE-LABEL: test_mm_cvtepi32_ps:
; SSE: # %bb.0:
; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 # encoding: [0x0f,0x5b,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cvtepi32_ps:
; AVX1: # %bb.0:
; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5b,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cvtepi32_ps:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtdq2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%res = sitofp <4 x i32> %arg0 to <4 x float>
ret <4 x float> %res
}
define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind {
; SSE-LABEL: test_mm_cvtpd_epi32:
; SSE: # %bb.0:
; SSE-NEXT: cvtpd2dq %xmm0, %xmm0 # encoding: [0xf2,0x0f,0xe6,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cvtpd_epi32:
; AVX1: # %bb.0:
; AVX1-NEXT: vcvtpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xe6,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cvtpd_epi32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
%bc = bitcast <4 x i32> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind {
; SSE-LABEL: test_mm_cvtpd_ps:
; SSE: # %bb.0:
; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5a,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cvtpd_ps:
; AVX1: # %bb.0:
; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5a,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cvtpd_ps:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind {
; SSE-LABEL: test_mm_cvtps_epi32:
; SSE: # %bb.0:
; SSE-NEXT: cvtps2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5b,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cvtps_epi32:
; AVX1: # %bb.0:
; AVX1-NEXT: vcvtps2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5b,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cvtps_epi32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
%bc = bitcast <4 x i32> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind {
; SSE-LABEL: test_mm_cvtps_pd:
; SSE: # %bb.0:
; SSE-NEXT: cvtps2pd %xmm0, %xmm0 # encoding: [0x0f,0x5a,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cvtps_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cvtps_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
%res = fpext <2 x float> %ext to <2 x double>
ret <2 x double> %res
}
define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind {
; X86-SSE-LABEL: test_mm_cvtsd_f64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %ebp # encoding: [0x55]
; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
; X86-SSE-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
; X86-SSE-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08]
; X86-SSE-NEXT: movlps %xmm0, (%esp) # encoding: [0x0f,0x13,0x04,0x24]
; X86-SSE-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24]
; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
; X86-SSE-NEXT: popl %ebp # encoding: [0x5d]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_cvtsd_f64:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: pushl %ebp # encoding: [0x55]
; X86-AVX1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
; X86-AVX1-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
; X86-AVX1-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08]
; X86-AVX1-NEXT: vmovlps %xmm0, (%esp) # encoding: [0xc5,0xf8,0x13,0x04,0x24]
; X86-AVX1-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24]
; X86-AVX1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
; X86-AVX1-NEXT: popl %ebp # encoding: [0x5d]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_cvtsd_f64:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: pushl %ebp # encoding: [0x55]
; X86-AVX512-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
; X86-AVX512-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
; X86-AVX512-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08]
; X86-AVX512-NEXT: vmovlps %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x04,0x24]
; X86-AVX512-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24]
; X86-AVX512-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
; X86-AVX512-NEXT: popl %ebp # encoding: [0x5d]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mm_cvtsd_f64:
; X64: # %bb.0:
; X64-NEXT: retq # encoding: [0xc3]
%res = extractelement <2 x double> %a0, i32 0
ret double %res
}
define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind {
; SSE-LABEL: test_mm_cvtsd_si32:
; SSE: # %bb.0:
; SSE-NEXT: cvtsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2d,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cvtsd_si32:
; AVX1: # %bb.0:
; AVX1-NEXT: vcvtsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2d,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cvtsd_si32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2d,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
ret i32 %res
}
declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) {
; SSE-LABEL: test_mm_cvtsd_ss:
; SSE: # %bb.0:
; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5a,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cvtsd_ss:
; AVX1: # %bb.0:
; AVX1-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cvtsd_ss:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
; X86-SSE-LABEL: test_mm_cvtsd_ss_load:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: cvtsd2ss (%eax), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x00]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_cvtsd_ss_load:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x00]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_cvtsd_ss_load:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_cvtsd_ss_load:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: cvtsd2ss (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x07]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_cvtsd_ss_load:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x07]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_cvtsd_ss_load:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%a1 = load <2 x double>, <2 x double>* %p1
%res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
ret <4 x float> %res
}
define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
; SSE-LABEL: test_mm_cvtsi128_si32:
; SSE: # %bb.0:
; SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cvtsi128_si32:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cvtsi128_si32:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%res = extractelement <4 x i32> %arg0, i32 0
ret i32 %res
}
define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
; X86-SSE-LABEL: test_mm_cvtsi32_sd:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x2a,0x44,0x24,0x04]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_cvtsi32_sd:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_cvtsi32_sd:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_cvtsi32_sd:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: cvtsi2sdl %edi, %xmm0 # encoding: [0xf2,0x0f,0x2a,0xc7]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_cvtsi32_sd:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0xc7]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_cvtsi32_sd:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%cvt = sitofp i32 %a1 to double
%res = insertelement <2 x double> %a0, double %cvt, i32 0
ret <2 x double> %res
}
define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
; X86-SSE-LABEL: test_mm_cvtsi32_si128:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_cvtsi32_si128:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_cvtsi32_si128:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_cvtsi32_si128:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_cvtsi32_si128:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_cvtsi32_si128:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
%res1 = insertelement <4 x i32> %res0, i32 0, i32 1
%res2 = insertelement <4 x i32> %res1, i32 0, i32 2
%res3 = insertelement <4 x i32> %res2, i32 0, i32 3
%res = bitcast <4 x i32> %res3 to <2 x i64>
ret <2 x i64> %res
}
define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
; SSE-LABEL: test_mm_cvtss_sd:
; SSE: # %bb.0:
; SSE-NEXT: cvtss2sd %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5a,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cvtss_sd:
; AVX1: # %bb.0:
; AVX1-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5a,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cvtss_sd:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%ext = extractelement <4 x float> %a1, i32 0
%cvt = fpext float %ext to double
%res = insertelement <2 x double> %a0, double %cvt, i32 0
ret <2 x double> %res
}
define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {
; SSE-LABEL: test_mm_cvttpd_epi32:
; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0xe6,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cvttpd_epi32:
; AVX1: # %bb.0:
; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe6,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cvttpd_epi32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
%bc = bitcast <4 x i32> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind {
; SSE-LABEL: test_mm_cvttps_epi32:
; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x5b,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cvttps_epi32:
; AVX1: # %bb.0:
; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5b,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cvttps_epi32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
%bc = bitcast <4 x i32> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
; SSE-LABEL: test_mm_cvttsd_si32:
; SSE: # %bb.0:
; SSE-NEXT: cvttsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2c,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_cvttsd_si32:
; AVX1: # %bb.0:
; AVX1-NEXT: vcvttsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2c,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cvttsd_si32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvttsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2c,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
ret i32 %res
}
declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_div_pd:
; SSE: # %bb.0:
; SSE-NEXT: divpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5e,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_div_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5e,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_div_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5e,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = fdiv <2 x double> %a0, %a1
ret <2 x double> %res
}
define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_div_sd:
; SSE: # %bb.0:
; SSE-NEXT: divsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5e,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_div_sd:
; AVX1: # %bb.0:
; AVX1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5e,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_div_sd:
; AVX512: # %bb.0:
; AVX512-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5e,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%ext0 = extractelement <2 x double> %a0, i32 0
%ext1 = extractelement <2 x double> %a1, i32 0
%fdiv = fdiv double %ext0, %ext1
%res = insertelement <2 x double> %a0, double %fdiv, i32 0
ret <2 x double> %res
}
define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind {
; SSE-LABEL: test_mm_extract_epi16:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $1, %xmm0, %eax # encoding: [0x66,0x0f,0xc5,0xc0,0x01]
; SSE-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_extract_epi16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpextrw $1, %xmm0, %eax # encoding: [0xc5,0xf9,0xc5,0xc0,0x01]
; AVX1-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_extract_epi16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpextrw $1, %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc5,0xc0,0x01]
; AVX512-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%ext = extractelement <8 x i16> %arg0, i32 1
%res = zext i16 %ext to i32
ret i32 %res
}
define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind {
; X86-SSE-LABEL: test_mm_insert_epi16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-SSE-NEXT: pinsrw $1, %eax, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc0,0x01]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_insert_epi16:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_insert_epi16:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_insert_epi16:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: pinsrw $1, %edi, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc7,0x01]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_insert_epi16:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x01]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_insert_epi16:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%res = insertelement <8 x i16> %arg0, i16 %a1,i32 1
%bc = bitcast <8 x i16> %res to <2 x i64>
ret <2 x i64> %bc
}
define void @test_mm_lfence() nounwind {
; CHECK-LABEL: test_mm_lfence:
; CHECK: # %bb.0:
; CHECK-NEXT: lfence # encoding: [0x0f,0xae,0xe8]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
call void @llvm.x86.sse2.lfence()
ret void
}
declare void @llvm.x86.sse2.lfence() nounwind readnone
define <2 x double> @test_mm_load_pd(double* %a0) nounwind {
; X86-SSE-LABEL: test_mm_load_pd:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_load_pd:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_load_pd:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_load_pd:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_load_pd:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_load_pd:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast double* %a0 to <2 x double>*
%res = load <2 x double>, <2 x double>* %arg0, align 16
ret <2 x double> %res
}
define <2 x double> @test_mm_load_sd(double* %a0) nounwind {
; X86-SSE-LABEL: test_mm_load_sd:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00]
; X86-SSE-NEXT: # xmm0 = mem[0],zero
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_load_sd:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00]
; X86-AVX1-NEXT: # xmm0 = mem[0],zero
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_load_sd:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
; X86-AVX512-NEXT: # xmm0 = mem[0],zero
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_load_sd:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07]
; X64-SSE-NEXT: # xmm0 = mem[0],zero
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_load_sd:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
; X64-AVX1-NEXT: # xmm0 = mem[0],zero
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_load_sd:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
; X64-AVX512-NEXT: # xmm0 = mem[0],zero
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%ld = load double, double* %a0, align 1
%res0 = insertelement <2 x double> undef, double %ld, i32 0
%res1 = insertelement <2 x double> %res0, double 0.0, i32 1
ret <2 x double> %res1
}
define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind {
; X86-SSE-LABEL: test_mm_load_si128:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_load_si128:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_load_si128:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_load_si128:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_load_si128:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_load_si128:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%res = load <2 x i64>, <2 x i64>* %a0, align 16
ret <2 x i64> %res
}
define <2 x double> @test_mm_load1_pd(double* %a0) nounwind {
; X86-SSE-LABEL: test_mm_load1_pd:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00]
; X86-SSE-NEXT: # xmm0 = mem[0],zero
; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
; X86-SSE-NEXT: # xmm0 = xmm0[0,0]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_load1_pd:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vmovddup (%eax), %xmm0 # encoding: [0xc5,0xfb,0x12,0x00]
; X86-AVX1-NEXT: # xmm0 = mem[0,0]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_load1_pd:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vmovddup (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x00]
; X86-AVX512-NEXT: # xmm0 = mem[0,0]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_load1_pd:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07]
; X64-SSE-NEXT: # xmm0 = mem[0],zero
; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
; X64-SSE-NEXT: # xmm0 = xmm0[0,0]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_load1_pd:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovddup (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x12,0x07]
; X64-AVX1-NEXT: # xmm0 = mem[0,0]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_load1_pd:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovddup (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07]
; X64-AVX512-NEXT: # xmm0 = mem[0,0]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%ld = load double, double* %a0, align 8
%res0 = insertelement <2 x double> undef, double %ld, i32 0
%res1 = insertelement <2 x double> %res0, double %ld, i32 1
ret <2 x double> %res1
}
define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind {
; X86-SSE-LABEL: test_mm_loadh_pd:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: movhpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x16,0x00]
; X86-SSE-NEXT: # xmm0 = xmm0[0],mem[0]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_loadh_pd:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x00]
; X86-AVX1-NEXT: # xmm0 = xmm0[0],mem[0]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_loadh_pd:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x00]
; X86-AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_loadh_pd:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movhpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x16,0x07]
; X64-SSE-NEXT: # xmm0 = xmm0[0],mem[0]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_loadh_pd:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x07]
; X64-AVX1-NEXT: # xmm0 = xmm0[0],mem[0]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_loadh_pd:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x07]
; X64-AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%ld = load double, double* %a1, align 8
%res = insertelement <2 x double> %a0, double %ld, i32 1
ret <2 x double> %res
}
define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind {
; X86-SSE-LABEL: test_mm_loadl_epi64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00]
; X86-SSE-NEXT: # xmm0 = mem[0],zero
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_loadl_epi64:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00]
; X86-AVX1-NEXT: # xmm0 = mem[0],zero
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_loadl_epi64:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
; X86-AVX512-NEXT: # xmm0 = mem[0],zero
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_loadl_epi64:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07]
; X64-SSE-NEXT: # xmm0 = mem[0],zero
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_loadl_epi64:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
; X64-AVX1-NEXT: # xmm0 = mem[0],zero
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_loadl_epi64:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
; X64-AVX512-NEXT: # xmm0 = mem[0],zero
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%bc = bitcast <2 x i64>* %a1 to i64*
%ld = load i64, i64* %bc, align 1
%res0 = insertelement <2 x i64> undef, i64 %ld, i32 0
%res1 = insertelement <2 x i64> %res0, i64 0, i32 1
ret <2 x i64> %res1
}
define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind {
; X86-SSE-LABEL: test_mm_loadl_pd:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: movlpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x12,0x00]
; X86-SSE-NEXT: # xmm0 = mem[0],xmm0[1]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_loadl_pd:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x00]
; X86-AVX1-NEXT: # xmm0 = mem[0],xmm0[1]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_loadl_pd:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x00]
; X86-AVX512-NEXT: # xmm0 = mem[0],xmm0[1]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_loadl_pd:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movlpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x12,0x07]
; X64-SSE-NEXT: # xmm0 = mem[0],xmm0[1]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_loadl_pd:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x07]
; X64-AVX1-NEXT: # xmm0 = mem[0],xmm0[1]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_loadl_pd:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x07]
; X64-AVX512-NEXT: # xmm0 = mem[0],xmm0[1]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%ld = load double, double* %a1, align 8
%res = insertelement <2 x double> %a0, double %ld, i32 0
ret <2 x double> %res
}
define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
; X86-SSE-LABEL: test_mm_loadr_pd:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: movapd (%eax), %xmm0 # encoding: [0x66,0x0f,0x28,0x00]
; X86-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
; X86-SSE-NEXT: # xmm0 = xmm0[1,0]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_loadr_pd:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vpermilpd $1, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01]
; X86-AVX1-NEXT: # xmm0 = mem[1,0]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_loadr_pd:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vpermilpd $1, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01]
; X86-AVX512-NEXT: # xmm0 = mem[1,0]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_loadr_pd:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movapd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x28,0x07]
; X64-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
; X64-SSE-NEXT: # xmm0 = xmm0[1,0]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_loadr_pd:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vpermilpd $1, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01]
; X64-AVX1-NEXT: # xmm0 = mem[1,0]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_loadr_pd:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vpermilpd $1, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01]
; X64-AVX512-NEXT: # xmm0 = mem[1,0]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast double* %a0 to <2 x double>*
%ld = load <2 x double>, <2 x double>* %arg0, align 16
%res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0>
ret <2 x double> %res
}
define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind {
; X86-SSE-LABEL: test_mm_loadu_pd:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_loadu_pd:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_loadu_pd:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_loadu_pd:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_loadu_pd:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_loadu_pd:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast double* %a0 to <2 x double>*
%res = load <2 x double>, <2 x double>* %arg0, align 1
ret <2 x double> %res
}
define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind {
; X86-SSE-LABEL: test_mm_loadu_si128:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_loadu_si128:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_loadu_si128:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_loadu_si128:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_loadu_si128:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_loadu_si128:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%res = load <2 x i64>, <2 x i64>* %a0, align 1
ret <2 x i64> %res
}
define <2 x i64> @test_mm_loadu_si64(i8* nocapture readonly %A) {
; X86-SSE-LABEL: test_mm_loadu_si64:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00]
; X86-SSE-NEXT: # xmm0 = mem[0],zero
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_loadu_si64:
; X86-AVX1: # %bb.0: # %entry
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00]
; X86-AVX1-NEXT: # xmm0 = mem[0],zero
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_loadu_si64:
; X86-AVX512: # %bb.0: # %entry
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
; X86-AVX512-NEXT: # xmm0 = mem[0],zero
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_loadu_si64:
; X64-SSE: # %bb.0: # %entry
; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07]
; X64-SSE-NEXT: # xmm0 = mem[0],zero
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_loadu_si64:
; X64-AVX1: # %bb.0: # %entry
; X64-AVX1-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
; X64-AVX1-NEXT: # xmm0 = mem[0],zero
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_loadu_si64:
; X64-AVX512: # %bb.0: # %entry
; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
; X64-AVX512-NEXT: # xmm0 = mem[0],zero
; X64-AVX512-NEXT: retq # encoding: [0xc3]
entry:
%__v.i = bitcast i8* %A to i64*
%0 = load i64, i64* %__v.i, align 1
%vecinit1.i = insertelement <2 x i64> <i64 undef, i64 0>, i64 %0, i32 0
ret <2 x i64> %vecinit1.i
}
define <2 x i64> @test_mm_loadu_si32(i8* nocapture readonly %A) {
; X86-SSE-LABEL: test_mm_loadu_si32:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00]
; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_loadu_si32:
; X86-AVX1: # %bb.0: # %entry
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vmovss (%eax), %xmm0 # encoding: [0xc5,0xfa,0x10,0x00]
; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_loadu_si32:
; X86-AVX512: # %bb.0: # %entry
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vmovss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00]
; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_loadu_si32:
; X64-SSE: # %bb.0: # %entry
; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07]
; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_loadu_si32:
; X64-AVX1: # %bb.0: # %entry
; X64-AVX1-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
; X64-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_loadu_si32:
; X64-AVX512: # %bb.0: # %entry
; X64-AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
; X64-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
; X64-AVX512-NEXT: retq # encoding: [0xc3]
entry:
%__v.i = bitcast i8* %A to i32*
%0 = load i32, i32* %__v.i, align 1
%vecinit3.i = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 %0, i32 0
%1 = bitcast <4 x i32> %vecinit3.i to <2 x i64>
ret <2 x i64> %1
}
define <2 x i64> @test_mm_loadu_si16(i8* nocapture readonly %A) {
; X86-SSE-LABEL: test_mm_loadu_si16:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: movzwl (%eax), %eax # encoding: [0x0f,0xb7,0x00]
; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_loadu_si16:
; X86-AVX1: # %bb.0: # %entry
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: movzwl (%eax), %eax # encoding: [0x0f,0xb7,0x00]
; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_loadu_si16:
; X86-AVX512: # %bb.0: # %entry
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: movzwl (%eax), %eax # encoding: [0x0f,0xb7,0x00]
; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_loadu_si16:
; X64-SSE: # %bb.0: # %entry
; X64-SSE-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_loadu_si16:
; X64-AVX1: # %bb.0: # %entry
; X64-AVX1-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_loadu_si16:
; X64-AVX512: # %bb.0: # %entry
; X64-AVX512-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
entry:
%__v.i = bitcast i8* %A to i16*
%0 = load i16, i16* %__v.i, align 1
%vecinit7.i = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %0, i32 0
%1 = bitcast <8 x i16> %vecinit7.i to <2 x i64>
ret <2 x i64> %1
}
define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_madd_epi16:
; SSE: # %bb.0:
; SSE-NEXT: pmaddwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf5,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_madd_epi16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf5,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_madd_epi16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
%res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1)
%bc = bitcast <4 x i32> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind {
; X86-SSE-LABEL: test_mm_maskmoveu_si128:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %edi # encoding: [0x57]
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08]
; X86-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1]
; X86-SSE-NEXT: popl %edi # encoding: [0x5f]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX-LABEL: test_mm_maskmoveu_si128:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: pushl %edi # encoding: [0x57]
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08]
; X86-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1]
; X86-AVX-NEXT: popl %edi # encoding: [0x5f]
; X86-AVX-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_maskmoveu_si128:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX-LABEL: test_mm_maskmoveu_si128:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1]
; X64-AVX-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2)
ret void
}
declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_max_epi16:
; SSE: # %bb.0:
; SSE-NEXT: pmaxsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xee,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_max_epi16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xee,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_max_epi16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
%cmp = icmp sgt <8 x i16> %arg0, %arg1
%sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
%bc = bitcast <8 x i16> %sel to <2 x i64>
ret <2 x i64> %bc
}
define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_max_epu8:
; SSE: # %bb.0:
; SSE-NEXT: pmaxub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xde,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_max_epu8:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xde,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_max_epu8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%cmp = icmp ugt <16 x i8> %arg0, %arg1
%sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
%bc = bitcast <16 x i8> %sel to <2 x i64>
ret <2 x i64> %bc
}
define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_max_pd:
; SSE: # %bb.0:
; SSE-NEXT: maxpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5f,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_max_pd:
; AVX1: # %bb.0:
; AVX1-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5f,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_max_pd:
; AVX512: # %bb.0:
; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5f,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_max_sd:
; SSE: # %bb.0:
; SSE-NEXT: maxsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5f,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_max_sd:
; AVX1: # %bb.0:
; AVX1-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5f,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_max_sd:
; AVX512: # %bb.0:
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5f,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
define void @test_mm_mfence() nounwind {
; CHECK-LABEL: test_mm_mfence:
; CHECK: # %bb.0:
; CHECK-NEXT: mfence # encoding: [0x0f,0xae,0xf0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
call void @llvm.x86.sse2.mfence()
ret void
}
declare void @llvm.x86.sse2.mfence() nounwind readnone
define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_min_epi16:
; SSE: # %bb.0:
; SSE-NEXT: pminsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xea,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_min_epi16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xea,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_min_epi16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
%cmp = icmp slt <8 x i16> %arg0, %arg1
%sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
%bc = bitcast <8 x i16> %sel to <2 x i64>
ret <2 x i64> %bc
}
define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_min_epu8:
; SSE: # %bb.0:
; SSE-NEXT: pminub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xda,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_min_epu8:
; AVX1: # %bb.0:
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xda,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_min_epu8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%cmp = icmp ult <16 x i8> %arg0, %arg1
%sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x