| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32 |
| ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X64 |
| |
| ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c |
| |
| define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_add_epi8: |
| ; X32: # BB#0: |
| ; X32-NEXT: paddb %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_add_epi8: |
| ; X64: # BB#0: |
| ; X64-NEXT: paddb %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| %res = add <16 x i8> %arg0, %arg1 |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_add_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: paddw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_add_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: paddw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = add <8 x i16> %arg0, %arg1 |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_add_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: paddd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_add_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: paddd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x i64> %a1 to <4 x i32> |
| %res = add <4 x i32> %arg0, %arg1 |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_add_epi64: |
| ; X32: # BB#0: |
| ; X32-NEXT: paddq %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_add_epi64: |
| ; X64: # BB#0: |
| ; X64-NEXT: paddq %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = add <2 x i64> %a0, %a1 |
| ret <2 x i64> %res |
| } |
| |
| define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_add_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: addpd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_add_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: addpd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = fadd <2 x double> %a0, %a1 |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_add_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: addsd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_add_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: addsd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %ext0 = extractelement <2 x double> %a0, i32 0 |
| %ext1 = extractelement <2 x double> %a1, i32 0 |
| %fadd = fadd double %ext0, %ext1 |
| %res = insertelement <2 x double> %a0, double %fadd, i32 0 |
| ret <2 x double> %res |
| } |
| |
| define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_adds_epi8: |
| ; X32: # BB#0: |
| ; X32-NEXT: paddsb %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_adds_epi8: |
| ; X64: # BB#0: |
| ; X64-NEXT: paddsb %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1) |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone |
| |
| define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_adds_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: paddsw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_adds_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: paddsw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1) |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_adds_epu8: |
| ; X32: # BB#0: |
| ; X32-NEXT: paddusb %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_adds_epu8: |
| ; X64: # BB#0: |
| ; X64-NEXT: paddusb %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1) |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone |
| |
| define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_adds_epu16: |
| ; X32: # BB#0: |
| ; X32-NEXT: paddusw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_adds_epu16: |
| ; X64: # BB#0: |
| ; X64-NEXT: paddusw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1) |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_and_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: andps %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_and_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: andps %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x double> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x double> %a1 to <4 x i32> |
| %res = and <4 x i32> %arg0, %arg1 |
| %bc = bitcast <4 x i32> %res to <2 x double> |
| ret <2 x double> %bc |
| } |
| |
| define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_and_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: andps %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_and_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: andps %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = and <2 x i64> %a0, %a1 |
| ret <2 x i64> %res |
| } |
| |
| define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_andnot_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: andnps %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_andnot_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: andnps %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x double> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x double> %a1 to <4 x i32> |
| %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1> |
| %res = and <4 x i32> %not, %arg1 |
| %bc = bitcast <4 x i32> %res to <2 x double> |
| ret <2 x double> %bc |
| } |
| |
| define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_andnot_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: pcmpeqd %xmm2, %xmm2 |
| ; X32-NEXT: pxor %xmm2, %xmm0 |
| ; X32-NEXT: pand %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_andnot_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: pcmpeqd %xmm2, %xmm2 |
| ; X64-NEXT: pxor %xmm2, %xmm0 |
| ; X64-NEXT: pand %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %not = xor <2 x i64> %a0, <i64 -1, i64 -1> |
| %res = and <2 x i64> %not, %a1 |
| ret <2 x i64> %res |
| } |
| |
| define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_avg_epu8: |
| ; X32: # BB#0: |
| ; X32-NEXT: pavgb %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_avg_epu8: |
| ; X64: # BB#0: |
| ; X64-NEXT: pavgb %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwind readnone |
| |
| define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_avg_epu16: |
| ; X32: # BB#0: |
| ; X32-NEXT: pavgw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_avg_epu16: |
| ; X64: # BB#0: |
| ; X64-NEXT: pavgw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %arg0, <8 x i16> %arg1) |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind { |
| ; X32-LABEL: test_mm_bslli_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_bslli_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind { |
| ; X32-LABEL: test_mm_bsrli_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_bsrli_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind { |
| ; X32-LABEL: test_mm_castpd_ps: |
| ; X32: # BB#0: |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_castpd_ps: |
| ; X64: # BB#0: |
| ; X64-NEXT: retq |
| %res = bitcast <2 x double> %a0 to <4 x float> |
| ret <4 x float> %res |
| } |
| |
| define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind { |
| ; X32-LABEL: test_mm_castpd_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_castpd_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: retq |
| %res = bitcast <2 x double> %a0 to <2 x i64> |
| ret <2 x i64> %res |
| } |
| |
| define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind { |
| ; X32-LABEL: test_mm_castps_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_castps_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: retq |
| %res = bitcast <4 x float> %a0 to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind { |
| ; X32-LABEL: test_mm_castps_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_castps_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: retq |
| %res = bitcast <4 x float> %a0 to <2 x i64> |
| ret <2 x i64> %res |
| } |
| |
| define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind { |
| ; X32-LABEL: test_mm_castsi128_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_castsi128_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: retq |
| %res = bitcast <2 x i64> %a0 to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind { |
| ; X32-LABEL: test_mm_castsi128_ps: |
| ; X32: # BB#0: |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_castsi128_ps: |
| ; X64: # BB#0: |
| ; X64-NEXT: retq |
| %res = bitcast <2 x i64> %a0 to <4 x float> |
| ret <4 x float> %res |
| } |
| |
| define void @test_mm_clflush(i8* %a0) nounwind { |
| ; X32-LABEL: test_mm_clflush: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: clflush (%eax) |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_clflush: |
| ; X64: # BB#0: |
| ; X64-NEXT: clflush (%rdi) |
| ; X64-NEXT: retq |
| call void @llvm.x86.sse2.clflush(i8* %a0) |
| ret void |
| } |
| declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone |
| |
| define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpeq_epi8: |
| ; X32: # BB#0: |
| ; X32-NEXT: pcmpeqb %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpeq_epi8: |
| ; X64: # BB#0: |
| ; X64-NEXT: pcmpeqb %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| %cmp = icmp eq <16 x i8> %arg0, %arg1 |
| %res = sext <16 x i1> %cmp to <16 x i8> |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpeq_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: pcmpeqw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpeq_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: pcmpeqw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %cmp = icmp eq <8 x i16> %arg0, %arg1 |
| %res = sext <8 x i1> %cmp to <8 x i16> |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpeq_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: pcmpeqd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpeq_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: pcmpeqd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x i64> %a1 to <4 x i32> |
| %cmp = icmp eq <4 x i32> %arg0, %arg1 |
| %res = sext <4 x i1> %cmp to <4 x i32> |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpeq_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpeqpd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpeq_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpeqpd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %fcmp = fcmp oeq <2 x double> %a0, %a1 |
| %sext = sext <2 x i1> %fcmp to <2 x i64> |
| %res = bitcast <2 x i64> %sext to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpeq_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpeqsd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpeq_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpeqsd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) |
| ret <2 x double> %res |
| } |
| declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone |
| |
| define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpge_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmplepd %xmm0, %xmm1 |
| ; X32-NEXT: movapd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpge_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmplepd %xmm0, %xmm1 |
| ; X64-NEXT: movapd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %fcmp = fcmp ole <2 x double> %a1, %a0 |
| %sext = sext <2 x i1> %fcmp to <2 x i64> |
| %res = bitcast <2 x i64> %sext to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpge_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmplesd %xmm0, %xmm1 |
| ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpge_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmplesd %xmm0, %xmm1 |
| ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] |
| ; X64-NEXT: retq |
| %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2) |
| %ext0 = extractelement <2 x double> %cmp, i32 0 |
| %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 |
| %ext1 = extractelement <2 x double> %a0, i32 1 |
| %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 |
| ret <2 x double> %ins1 |
| } |
| |
| define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpgt_epi8: |
| ; X32: # BB#0: |
| ; X32-NEXT: pcmpgtb %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpgt_epi8: |
| ; X64: # BB#0: |
| ; X64-NEXT: pcmpgtb %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| %cmp = icmp sgt <16 x i8> %arg0, %arg1 |
| %res = sext <16 x i1> %cmp to <16 x i8> |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpgt_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: pcmpgtw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpgt_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: pcmpgtw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %cmp = icmp sgt <8 x i16> %arg0, %arg1 |
| %res = sext <8 x i1> %cmp to <8 x i16> |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpgt_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: pcmpgtd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpgt_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: pcmpgtd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x i64> %a1 to <4 x i32> |
| %cmp = icmp sgt <4 x i32> %arg0, %arg1 |
| %res = sext <4 x i1> %cmp to <4 x i32> |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpgt_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpltpd %xmm0, %xmm1 |
| ; X32-NEXT: movapd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpgt_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpltpd %xmm0, %xmm1 |
| ; X64-NEXT: movapd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %fcmp = fcmp olt <2 x double> %a1, %a0 |
| %sext = sext <2 x i1> %fcmp to <2 x i64> |
| %res = bitcast <2 x i64> %sext to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpgt_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpltsd %xmm0, %xmm1 |
| ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpgt_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpltsd %xmm0, %xmm1 |
| ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] |
| ; X64-NEXT: retq |
| %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1) |
| %ext0 = extractelement <2 x double> %cmp, i32 0 |
| %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 |
| %ext1 = extractelement <2 x double> %a0, i32 1 |
| %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 |
| ret <2 x double> %ins1 |
| } |
| |
| define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmple_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmplepd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmple_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmplepd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %fcmp = fcmp ole <2 x double> %a0, %a1 |
| %sext = sext <2 x i1> %fcmp to <2 x i64> |
| %res = bitcast <2 x i64> %sext to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmple_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmplesd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmple_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmplesd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2) |
| ret <2 x double> %res |
| } |
| |
| define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmplt_epi8: |
| ; X32: # BB#0: |
| ; X32-NEXT: pcmpgtb %xmm0, %xmm1 |
| ; X32-NEXT: movdqa %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmplt_epi8: |
| ; X64: # BB#0: |
| ; X64-NEXT: pcmpgtb %xmm0, %xmm1 |
| ; X64-NEXT: movdqa %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| %cmp = icmp sgt <16 x i8> %arg1, %arg0 |
| %res = sext <16 x i1> %cmp to <16 x i8> |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmplt_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: pcmpgtw %xmm0, %xmm1 |
| ; X32-NEXT: movdqa %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmplt_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: pcmpgtw %xmm0, %xmm1 |
| ; X64-NEXT: movdqa %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %cmp = icmp sgt <8 x i16> %arg1, %arg0 |
| %res = sext <8 x i1> %cmp to <8 x i16> |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmplt_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: pcmpgtd %xmm0, %xmm1 |
| ; X32-NEXT: movdqa %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmplt_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: pcmpgtd %xmm0, %xmm1 |
| ; X64-NEXT: movdqa %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x i64> %a1 to <4 x i32> |
| %cmp = icmp sgt <4 x i32> %arg1, %arg0 |
| %res = sext <4 x i1> %cmp to <4 x i32> |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmplt_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpltpd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmplt_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpltpd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %fcmp = fcmp olt <2 x double> %a0, %a1 |
| %sext = sext <2 x i1> %fcmp to <2 x i64> |
| %res = bitcast <2 x i64> %sext to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmplt_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpltsd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmplt_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpltsd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1) |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpneq_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpneqpd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpneq_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpneqpd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %fcmp = fcmp une <2 x double> %a0, %a1 |
| %sext = sext <2 x i1> %fcmp to <2 x i64> |
| %res = bitcast <2 x i64> %sext to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpneq_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpneqsd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpneq_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpneqsd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4) |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpnge_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpnlepd %xmm0, %xmm1 |
| ; X32-NEXT: movapd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpnge_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpnlepd %xmm0, %xmm1 |
| ; X64-NEXT: movapd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %fcmp = fcmp ugt <2 x double> %a1, %a0 |
| %sext = sext <2 x i1> %fcmp to <2 x i64> |
| %res = bitcast <2 x i64> %sext to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpnge_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpnlesd %xmm0, %xmm1 |
| ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpnge_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpnlesd %xmm0, %xmm1 |
| ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] |
| ; X64-NEXT: retq |
| %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6) |
| %ext0 = extractelement <2 x double> %cmp, i32 0 |
| %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 |
| %ext1 = extractelement <2 x double> %a0, i32 1 |
| %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 |
| ret <2 x double> %ins1 |
| } |
| |
| define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpngt_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpnltpd %xmm0, %xmm1 |
| ; X32-NEXT: movapd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpngt_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpnltpd %xmm0, %xmm1 |
| ; X64-NEXT: movapd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %fcmp = fcmp uge <2 x double> %a1, %a0 |
| %sext = sext <2 x i1> %fcmp to <2 x i64> |
| %res = bitcast <2 x i64> %sext to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpngt_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpnltsd %xmm0, %xmm1 |
| ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpngt_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpnltsd %xmm0, %xmm1 |
| ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] |
| ; X64-NEXT: retq |
| %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5) |
| %ext0 = extractelement <2 x double> %cmp, i32 0 |
| %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 |
| %ext1 = extractelement <2 x double> %a0, i32 1 |
| %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 |
| ret <2 x double> %ins1 |
| } |
| |
| define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpnle_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpnlepd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpnle_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpnlepd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %fcmp = fcmp ugt <2 x double> %a0, %a1 |
| %sext = sext <2 x i1> %fcmp to <2 x i64> |
| %res = bitcast <2 x i64> %sext to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpnle_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpnlesd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpnle_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpnlesd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6) |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpnlt_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpnltpd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpnlt_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpnltpd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %fcmp = fcmp uge <2 x double> %a0, %a1 |
| %sext = sext <2 x i1> %fcmp to <2 x i64> |
| %res = bitcast <2 x i64> %sext to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpnlt_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpnltsd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpnlt_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpnltsd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5) |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpord_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpordpd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpord_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpordpd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %fcmp = fcmp ord <2 x double> %a0, %a1 |
| %sext = sext <2 x i1> %fcmp to <2 x i64> |
| %res = bitcast <2 x i64> %sext to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpord_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpordsd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpord_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpordsd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpunord_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpunordpd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpunord_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpunordpd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %fcmp = fcmp uno <2 x double> %a0, %a1 |
| %sext = sext <2 x i1> %fcmp to <2 x i64> |
| %res = bitcast <2 x i64> %sext to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_cmpunord_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cmpunordsd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cmpunord_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cmpunordsd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3) |
| ret <2 x double> %res |
| } |
| |
| define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_comieq_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: comisd %xmm1, %xmm0 |
| ; X32-NEXT: setnp %al |
| ; X32-NEXT: sete %cl |
| ; X32-NEXT: andb %al, %cl |
| ; X32-NEXT: movzbl %cl, %eax |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_comieq_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: comisd %xmm1, %xmm0 |
| ; X64-NEXT: setnp %al |
| ; X64-NEXT: sete %cl |
| ; X64-NEXT: andb %al, %cl |
| ; X64-NEXT: movzbl %cl, %eax |
| ; X64-NEXT: retq |
| %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_comige_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: xorl %eax, %eax |
| ; X32-NEXT: comisd %xmm1, %xmm0 |
| ; X32-NEXT: setae %al |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_comige_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: comisd %xmm1, %xmm0 |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: retq |
| %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_comigt_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: xorl %eax, %eax |
| ; X32-NEXT: comisd %xmm1, %xmm0 |
| ; X32-NEXT: seta %al |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_comigt_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: comisd %xmm1, %xmm0 |
| ; X64-NEXT: seta %al |
| ; X64-NEXT: retq |
| %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_comile_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: xorl %eax, %eax |
| ; X32-NEXT: comisd %xmm0, %xmm1 |
| ; X32-NEXT: setae %al |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_comile_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: comisd %xmm0, %xmm1 |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: retq |
| %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_comilt_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: xorl %eax, %eax |
| ; X32-NEXT: comisd %xmm0, %xmm1 |
| ; X32-NEXT: seta %al |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_comilt_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: comisd %xmm0, %xmm1 |
| ; X64-NEXT: seta %al |
| ; X64-NEXT: retq |
| %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_comineq_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: comisd %xmm1, %xmm0 |
| ; X32-NEXT: setp %al |
| ; X32-NEXT: setne %cl |
| ; X32-NEXT: orb %al, %cl |
| ; X32-NEXT: movzbl %cl, %eax |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_comineq_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: comisd %xmm1, %xmm0 |
| ; X64-NEXT: setp %al |
| ; X64-NEXT: setne %cl |
| ; X64-NEXT: orb %al, %cl |
| ; X64-NEXT: movzbl %cl, %eax |
| ; X64-NEXT: retq |
| %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind { |
| ; X32-LABEL: test_mm_cvtepi32_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cvtdq2pd %xmm0, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvtepi32_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cvtdq2pd %xmm0, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1> |
| %res = sitofp <2 x i32> %ext to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind { |
| ; X32-LABEL: test_mm_cvtepi32_ps: |
| ; X32: # BB#0: |
| ; X32-NEXT: cvtdq2ps %xmm0, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvtepi32_ps: |
| ; X64: # BB#0: |
| ; X64-NEXT: cvtdq2ps %xmm0, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %arg0) |
| ret <4 x float> %res |
| } |
| declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone |
| |
| define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind { |
| ; X32-LABEL: test_mm_cvtpd_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: cvtpd2dq %xmm0, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvtpd_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: cvtpd2dq %xmm0, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone |
| |
| define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind { |
| ; X32-LABEL: test_mm_cvtpd_ps: |
| ; X32: # BB#0: |
| ; X32-NEXT: cvtpd2ps %xmm0, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvtpd_ps: |
| ; X64: # BB#0: |
| ; X64-NEXT: cvtpd2ps %xmm0, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) |
| ret <4 x float> %res |
| } |
| declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone |
| |
| define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind { |
| ; X32-LABEL: test_mm_cvtps_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: cvtps2dq %xmm0, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvtps_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: cvtps2dq %xmm0, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone |
| |
| define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind { |
| ; X32-LABEL: test_mm_cvtps_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cvtps2pd %xmm0, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvtps_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cvtps2pd %xmm0, %xmm0 |
| ; X64-NEXT: retq |
| %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1> |
| %res = fpext <2 x float> %ext to <2 x double> |
| ret <2 x double> %res |
| } |
| |
| define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind { |
| ; X32-LABEL: test_mm_cvtsd_f64: |
| ; X32: # BB#0: |
| ; X32-NEXT: pushl %ebp |
| ; X32-NEXT: movl %esp, %ebp |
| ; X32-NEXT: andl $-8, %esp |
| ; X32-NEXT: subl $8, %esp |
| ; X32-NEXT: movlps %xmm0, (%esp) |
| ; X32-NEXT: fldl (%esp) |
| ; X32-NEXT: movl %ebp, %esp |
| ; X32-NEXT: popl %ebp |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvtsd_f64: |
| ; X64: # BB#0: |
| ; X64-NEXT: retq |
| %res = extractelement <2 x double> %a0, i32 0 |
| ret double %res |
| } |
| |
| define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind { |
| ; X32-LABEL: test_mm_cvtsd_si32: |
| ; X32: # BB#0: |
| ; X32-NEXT: cvtsd2si %xmm0, %eax |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvtsd_si32: |
| ; X64: # BB#0: |
| ; X64-NEXT: cvtsd2si %xmm0, %eax |
| ; X64-NEXT: retq |
| %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone |
| |
| define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) { |
| ; X32-LABEL: test_mm_cvtsd_ss: |
| ; X32: # BB#0: |
| ; X32-NEXT: cvtsd2ss %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvtsd_ss: |
| ; X64: # BB#0: |
| ; X64-NEXT: cvtsd2ss %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) |
| ret <4 x float> %res |
| } |
| declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone |
| |
| define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) { |
| ; X32-LABEL: test_mm_cvtsd_ss_load: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: cvtsd2ss (%eax), %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvtsd_ss_load: |
| ; X64: # BB#0: |
| ; X64-NEXT: cvtsd2ss (%rdi), %xmm0 |
| ; X64-NEXT: retq |
| %a1 = load <2 x double>, <2 x double>* %p1 |
| %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) |
| ret <4 x float> %res |
| } |
| |
| define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind { |
| ; X32-LABEL: test_mm_cvtsi128_si32: |
| ; X32: # BB#0: |
| ; X32-NEXT: movd %xmm0, %eax |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvtsi128_si32: |
| ; X64: # BB#0: |
| ; X64-NEXT: movd %xmm0, %eax |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %res = extractelement <4 x i32> %arg0, i32 0 |
| ret i32 %res |
| } |
| |
| define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind { |
| ; X32-LABEL: test_mm_cvtsi32_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvtsi32_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cvtsi2sdl %edi, %xmm0 |
| ; X64-NEXT: retq |
| %cvt = sitofp i32 %a1 to double |
| %res = insertelement <2 x double> %a0, double %cvt, i32 0 |
| ret <2 x double> %res |
| } |
| |
| define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind { |
| ; X32-LABEL: test_mm_cvtsi32_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvtsi32_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: movd %edi, %xmm0 |
| ; X64-NEXT: retq |
| %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 |
| %res1 = insertelement <4 x i32> %res0, i32 0, i32 1 |
| %res2 = insertelement <4 x i32> %res1, i32 0, i32 2 |
| %res3 = insertelement <4 x i32> %res2, i32 0, i32 3 |
| %res = bitcast <4 x i32> %res3 to <2 x i64> |
| ret <2 x i64> %res |
| } |
| |
| define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind { |
| ; X32-LABEL: test_mm_cvtss_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: cvtss2sd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvtss_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: cvtss2sd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %ext = extractelement <4 x float> %a1, i32 0 |
| %cvt = fpext float %ext to double |
| %res = insertelement <2 x double> %a0, double %cvt, i32 0 |
| ret <2 x double> %res |
| } |
| |
| define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind { |
| ; X32-LABEL: test_mm_cvttpd_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: cvttpd2dq %xmm0, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvttpd_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: cvttpd2dq %xmm0, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone |
| |
| define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind { |
| ; X32-LABEL: test_mm_cvttps_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: cvttps2dq %xmm0, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvttps_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: cvttps2dq %xmm0, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone |
| |
| define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind { |
| ; X32-LABEL: test_mm_cvttsd_si32: |
| ; X32: # BB#0: |
| ; X32-NEXT: cvttsd2si %xmm0, %eax |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_cvttsd_si32: |
| ; X64: # BB#0: |
| ; X64-NEXT: cvttsd2si %xmm0, %eax |
| ; X64-NEXT: retq |
| %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone |
| |
| define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_div_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: divpd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_div_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: divpd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = fdiv <2 x double> %a0, %a1 |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_div_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: divsd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_div_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: divsd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %ext0 = extractelement <2 x double> %a0, i32 0 |
| %ext1 = extractelement <2 x double> %a1, i32 0 |
| %fdiv = fdiv double %ext0, %ext1 |
| %res = insertelement <2 x double> %a0, double %fdiv, i32 0 |
| ret <2 x double> %res |
| } |
| |
| define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind { |
| ; X32-LABEL: test_mm_extract_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: pextrw $1, %xmm0, %eax |
| ; X32-NEXT: movzwl %ax, %eax |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_extract_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: pextrw $1, %xmm0, %eax |
| ; X64-NEXT: movzwl %ax, %eax |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %ext = extractelement <8 x i16> %arg0, i32 1 |
| %res = zext i16 %ext to i32 |
| ret i32 %res |
| } |
| |
| define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind { |
| ; X32-LABEL: test_mm_insert_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: pinsrw $1, %eax, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_insert_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: pinsrw $1, %edi, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1 |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define void @test_mm_lfence() nounwind { |
| ; X32-LABEL: test_mm_lfence: |
| ; X32: # BB#0: |
| ; X32-NEXT: lfence |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_lfence: |
| ; X64: # BB#0: |
| ; X64-NEXT: lfence |
| ; X64-NEXT: retq |
| call void @llvm.x86.sse2.lfence() |
| ret void |
| } |
| declare void @llvm.x86.sse2.lfence() nounwind readnone |
| |
| define <2 x double> @test_mm_load_pd(double* %a0) nounwind { |
| ; X32-LABEL: test_mm_load_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movaps (%eax), %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_load_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movaps (%rdi), %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast double* %a0 to <2 x double>* |
| %res = load <2 x double>, <2 x double>* %arg0, align 16 |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_load_sd(double* %a0) nounwind { |
| ; X32-LABEL: test_mm_load_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_load_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; X64-NEXT: retq |
| %ld = load double, double* %a0, align 1 |
| %res0 = insertelement <2 x double> undef, double %ld, i32 0 |
| %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 |
| ret <2 x double> %res1 |
| } |
| |
| define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind { |
| ; X32-LABEL: test_mm_load_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movaps (%eax), %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_load_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: movaps (%rdi), %xmm0 |
| ; X64-NEXT: retq |
| %res = load <2 x i64>, <2 x i64>* %a0, align 16 |
| ret <2 x i64> %res |
| } |
| |
| define <2 x double> @test_mm_load1_pd(double* %a0) nounwind { |
| ; X32-LABEL: test_mm_load1_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_load1_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] |
| ; X64-NEXT: retq |
| %ld = load double, double* %a0, align 8 |
| %res0 = insertelement <2 x double> undef, double %ld, i32 0 |
| %res1 = insertelement <2 x double> %res0, double %ld, i32 1 |
| ret <2 x double> %res1 |
| } |
| |
| define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind { |
| ; X32-LABEL: test_mm_loadh_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_loadh_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] |
| ; X64-NEXT: retq |
| %ld = load double, double* %a1, align 8 |
| %res = insertelement <2 x double> %a0, double %ld, i32 1 |
| ret <2 x double> %res |
| } |
| |
| define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind { |
| ; X32-LABEL: test_mm_loadl_epi64: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_loadl_epi64: |
| ; X64: # BB#0: |
| ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; X64-NEXT: retq |
| %bc = bitcast <2 x i64>* %a1 to i64* |
| %ld = load i64, i64* %bc, align 1 |
| %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0 |
| %res1 = insertelement <2 x i64> %res0, i64 0, i32 1 |
| ret <2 x i64> %res1 |
| } |
| |
| define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind { |
| ; X32-LABEL: test_mm_loadl_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_loadl_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] |
| ; X64-NEXT: retq |
| %ld = load double, double* %a1, align 8 |
| %res = insertelement <2 x double> %a0, double %ld, i32 0 |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind { |
| ; X32-LABEL: test_mm_loadr_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movapd (%eax), %xmm0 |
| ; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_loadr_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movapd (%rdi), %xmm0 |
| ; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] |
| ; X64-NEXT: retq |
| %arg0 = bitcast double* %a0 to <2 x double>* |
| %ld = load <2 x double>, <2 x double>* %arg0, align 16 |
| %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0> |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind { |
| ; X32-LABEL: test_mm_loadu_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movups (%eax), %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_loadu_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movups (%rdi), %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast double* %a0 to <2 x double>* |
| %res = load <2 x double>, <2 x double>* %arg0, align 1 |
| ret <2 x double> %res |
| } |
| |
| define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind { |
| ; X32-LABEL: test_mm_loadu_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movups (%eax), %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_loadu_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: movups (%rdi), %xmm0 |
| ; X64-NEXT: retq |
| %res = load <2 x i64>, <2 x i64>* %a0, align 1 |
| ret <2 x i64> %res |
| } |
| |
| define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_madd_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: pmaddwd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_madd_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: pmaddwd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1) |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind { |
| ; X32-LABEL: test_mm_maskmoveu_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: pushl %edi |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; X32-NEXT: maskmovdqu %xmm1, %xmm0 |
| ; X32-NEXT: popl %edi |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_maskmoveu_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: maskmovdqu %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2) |
| ret void |
| } |
| declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind |
| |
| define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_max_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: pmaxsw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_max_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: pmaxsw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %cmp = icmp sgt <8 x i16> %arg0, %arg1 |
| %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 |
| %bc = bitcast <8 x i16> %sel to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_max_epu8: |
| ; X32: # BB#0: |
| ; X32-NEXT: pmaxub %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_max_epu8: |
| ; X64: # BB#0: |
| ; X64-NEXT: pmaxub %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| %cmp = icmp ugt <16 x i8> %arg0, %arg1 |
| %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 |
| %bc = bitcast <16 x i8> %sel to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_max_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: maxpd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_max_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: maxpd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) |
| ret <2 x double> %res |
| } |
| declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_max_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: maxsd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_max_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: maxsd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) |
| ret <2 x double> %res |
| } |
| declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define void @test_mm_mfence() nounwind { |
| ; X32-LABEL: test_mm_mfence: |
| ; X32: # BB#0: |
| ; X32-NEXT: mfence |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_mfence: |
| ; X64: # BB#0: |
| ; X64-NEXT: mfence |
| ; X64-NEXT: retq |
| call void @llvm.x86.sse2.mfence() |
| ret void |
| } |
| declare void @llvm.x86.sse2.mfence() nounwind readnone |
| |
| define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_min_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: pminsw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_min_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: pminsw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %cmp = icmp slt <8 x i16> %arg0, %arg1 |
| %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 |
| %bc = bitcast <8 x i16> %sel to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_min_epu8: |
| ; X32: # BB#0: |
| ; X32-NEXT: pminub %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_min_epu8: |
| ; X64: # BB#0: |
| ; X64-NEXT: pminub %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| %cmp = icmp ult <16 x i8> %arg0, %arg1 |
| %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 |
| %bc = bitcast <16 x i8> %sel to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_min_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: minpd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_min_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: minpd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) |
| ret <2 x double> %res |
| } |
| declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_min_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: minsd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_min_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: minsd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) |
| ret <2 x double> %res |
| } |
| declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind { |
| ; X32-LABEL: test_mm_move_epi64: |
| ; X32: # BB#0: |
| ; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_move_epi64: |
| ; X64: # BB#0: |
| ; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero |
| ; X64-NEXT: retq |
| %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2> |
| ret <2 x i64> %res |
| } |
| |
| define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_move_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_move_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] |
| ; X64-NEXT: retq |
| %ext0 = extractelement <2 x double> %a1, i32 0 |
| %res0 = insertelement <2 x double> undef, double %ext0, i32 0 |
| %ext1 = extractelement <2 x double> %a0, i32 1 |
| %res1 = insertelement <2 x double> %res0, double %ext1, i32 1 |
| ret <2 x double> %res1 |
| } |
| |
| define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind { |
| ; X32-LABEL: test_mm_movemask_epi8: |
| ; X32: # BB#0: |
| ; X32-NEXT: pmovmskb %xmm0, %eax |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_movemask_epi8: |
| ; X64: # BB#0: |
| ; X64-NEXT: pmovmskb %xmm0, %eax |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone |
| |
| define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind { |
| ; X32-LABEL: test_mm_movemask_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movmskpd %xmm0, %eax |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_movemask_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movmskpd %xmm0, %eax |
| ; X64-NEXT: retq |
| %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone |
| |
| define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_mul_epu32: |
| ; X32: # BB#0: |
| ; X32-NEXT: pmuludq %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_mul_epu32: |
| ; X64: # BB#0: |
| ; X64-NEXT: pmuludq %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x i64> %a1 to <4 x i32> |
| %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %arg0, <4 x i32> %arg1) |
| ret <2 x i64> %res |
| } |
| declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone |
| |
| define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_mul_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: mulpd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_mul_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: mulpd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = fmul <2 x double> %a0, %a1 |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_mul_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: mulsd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_mul_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: mulsd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %ext0 = extractelement <2 x double> %a0, i32 0 |
| %ext1 = extractelement <2 x double> %a1, i32 0 |
| %fmul = fmul double %ext0, %ext1 |
| %res = insertelement <2 x double> %a0, double %fmul, i32 0 |
| ret <2 x double> %res |
| } |
| |
| define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_mulhi_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: pmulhw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_mulhi_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: pmulhw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1) |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_mulhi_epu16: |
| ; X32: # BB#0: |
| ; X32-NEXT: pmulhuw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_mulhi_epu16: |
| ; X64: # BB#0: |
| ; X64-NEXT: pmulhuw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1) |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_mullo_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: pmullw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_mullo_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: pmullw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = mul <8 x i16> %arg0, %arg1 |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_or_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: orps %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_or_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: orps %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x double> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x double> %a1 to <4 x i32> |
| %res = or <4 x i32> %arg0, %arg1 |
| %bc = bitcast <4 x i32> %res to <2 x double> |
| ret <2 x double> %bc |
| } |
| |
| define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_or_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: orps %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_or_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: orps %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = or <2 x i64> %a0, %a1 |
| ret <2 x i64> %res |
| } |
| |
| define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_packs_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: packsswb %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_packs_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: packsswb %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1) |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_packs_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: packssdw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_packs_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: packssdw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x i64> %a1 to <4 x i32> |
| %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1) |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone |
| |
| define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_packus_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: packuswb %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_packus_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: packuswb %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1) |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define void @test_mm_pause() nounwind { |
| ; X32-LABEL: test_mm_pause: |
| ; X32: # BB#0: |
| ; X32-NEXT: pause |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_pause: |
| ; X64: # BB#0: |
| ; X64-NEXT: pause |
| ; X64-NEXT: retq |
| call void @llvm.x86.sse2.pause() |
| ret void |
| } |
| declare void @llvm.x86.sse2.pause() nounwind readnone |
| |
| define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_sad_epu8: |
| ; X32: # BB#0: |
| ; X32-NEXT: psadbw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_sad_epu8: |
| ; X64: # BB#0: |
| ; X64-NEXT: psadbw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1) |
| ret <2 x i64> %res |
| } |
| declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone |
| |
| define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { |
| ; X32-LABEL: test_mm_set_epi8: |
| ; X32: # BB#0: |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm1 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm2 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm3 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm1 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] |
| ; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm2 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm3 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm2 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm4 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] |
| ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] |
| ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_set_epi8: |
| ; X64: # BB#0: |
| ; X64-NEXT: movzbl %dil, %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: movzbl %sil, %eax |
| ; X64-NEXT: movd %eax, %xmm1 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] |
| ; X64-NEXT: movzbl %dl, %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: movzbl %cl, %eax |
| ; X64-NEXT: movd %eax, %xmm2 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] |
| ; X64-NEXT: movzbl %r8b, %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: movzbl %r9b, %eax |
| ; X64-NEXT: movd %eax, %xmm3 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm1 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] |
| ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm2 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm3 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm2 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm4 |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] |
| ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] |
| ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X64-NEXT: retq |
| %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0 |
| %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1 |
| %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2 |
| %res3 = insertelement <16 x i8> %res2, i8 %a12, i32 3 |
| %res4 = insertelement <16 x i8> %res3, i8 %a11, i32 4 |
| %res5 = insertelement <16 x i8> %res4, i8 %a10, i32 5 |
| %res6 = insertelement <16 x i8> %res5, i8 %a9 , i32 6 |
| %res7 = insertelement <16 x i8> %res6, i8 %a8 , i32 7 |
| %res8 = insertelement <16 x i8> %res7, i8 %a7 , i32 8 |
| %res9 = insertelement <16 x i8> %res8, i8 %a6 , i32 9 |
| %res10 = insertelement <16 x i8> %res9, i8 %a5 , i32 10 |
| %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11 |
| %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12 |
| %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13 |
| %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14 |
| %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15 |
| %res = bitcast <16 x i8> %res15 to <2 x i64> |
| ret <2 x i64> %res |
| } |
| |
| define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { |
| ; X32-LABEL: test_mm_set_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm1 |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm2 |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm3 |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm4 |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm5 |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm6 |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm7 |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] |
| ; X32-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] |
| ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] |
| ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_set_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w |
| ; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax |
| ; X64-NEXT: movd %edi, %xmm0 |
| ; X64-NEXT: movd %esi, %xmm1 |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] |
| ; X64-NEXT: movd %edx, %xmm0 |
| ; X64-NEXT: movd %ecx, %xmm2 |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] |
| ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] |
| ; X64-NEXT: movd %r8d, %xmm0 |
| ; X64-NEXT: movd %r9d, %xmm1 |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] |
| ; X64-NEXT: movd %eax, %xmm3 |
| ; X64-NEXT: movd %r10d, %xmm0 |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] |
| ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
| ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] |
| ; X64-NEXT: retq |
| %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0 |
| %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1 |
| %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2 |
| %res3 = insertelement <8 x i16> %res2, i16 %a4, i32 3 |
| %res4 = insertelement <8 x i16> %res3, i16 %a3, i32 4 |
| %res5 = insertelement <8 x i16> %res4, i16 %a2, i32 5 |
| %res6 = insertelement <8 x i16> %res5, i16 %a1, i32 6 |
| %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7 |
| %res = bitcast <8 x i16> %res7 to <2 x i64> |
| ret <2 x i64> %res |
| } |
| |
| define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { |
| ; X32-LABEL: test_mm_set_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| ; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] |
| ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero |
| ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] |
| ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_set_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: movd %edi, %xmm0 |
| ; X64-NEXT: movd %esi, %xmm1 |
| ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] |
| ; X64-NEXT: movd %edx, %xmm2 |
| ; X64-NEXT: movd %ecx, %xmm0 |
| ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] |
| ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X64-NEXT: retq |
| %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0 |
| %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1 |
| %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2 |
| %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3 |
| %res = bitcast <4 x i32> %res3 to <2 x i64> |
| ret <2 x i64> %res |
| } |
| |
| ; TODO test_mm_set_epi64 |
| |
| define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind { |
| ; X32-LABEL: test_mm_set_epi64x: |
| ; X32: # BB#0: |
| ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] |
| ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero |
| ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] |
| ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_set_epi64x: |
| ; X64: # BB#0: |
| ; X64-NEXT: movq %rdi, %xmm1 |
| ; X64-NEXT: movq %rsi, %xmm0 |
| ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X64-NEXT: retq |
| %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0 |
| %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 |
| ret <2 x i64> %res1 |
| } |
| |
| define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind { |
| ; X32-LABEL: test_mm_set_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero |
| ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_set_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] |
| ; X64-NEXT: movapd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res0 = insertelement <2 x double> undef, double %a1, i32 0 |
| %res1 = insertelement <2 x double> %res0, double %a0, i32 1 |
| ret <2 x double> %res1 |
| } |
| |
| define <2 x double> @test_mm_set_pd1(double %a0) nounwind { |
| ; X32-LABEL: test_mm_set_pd1: |
| ; X32: # BB#0: |
| ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_set_pd1: |
| ; X64: # BB#0: |
| ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] |
| ; X64-NEXT: retq |
| %res0 = insertelement <2 x double> undef, double %a0, i32 0 |
| %res1 = insertelement <2 x double> %res0, double %a0, i32 1 |
| ret <2 x double> %res1 |
| } |
| |
| define <2 x double> @test_mm_set_sd(double %a0) nounwind { |
| ; X32-LABEL: test_mm_set_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero |
| ; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_set_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero |
| ; X64-NEXT: retq |
| %res0 = insertelement <2 x double> undef, double %a0, i32 0 |
| %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 |
| ret <2 x double> %res1 |
| } |
| |
| define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind { |
| ; X32-LABEL: test_mm_set1_epi8: |
| ; X32: # BB#0: |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] |
| ; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] |
| ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_set1_epi8: |
| ; X64: # BB#0: |
| ; X64-NEXT: movzbl %dil, %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] |
| ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] |
| ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] |
| ; X64-NEXT: retq |
| %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0 |
| %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1 |
| %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2 |
| %res3 = insertelement <16 x i8> %res2, i8 %a0, i32 3 |
| %res4 = insertelement <16 x i8> %res3, i8 %a0, i32 4 |
| %res5 = insertelement <16 x i8> %res4, i8 %a0, i32 5 |
| %res6 = insertelement <16 x i8> %res5, i8 %a0, i32 6 |
| %res7 = insertelement <16 x i8> %res6, i8 %a0, i32 7 |
| %res8 = insertelement <16 x i8> %res7, i8 %a0, i32 8 |
| %res9 = insertelement <16 x i8> %res8, i8 %a0, i32 9 |
| %res10 = insertelement <16 x i8> %res9, i8 %a0, i32 10 |
| %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11 |
| %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12 |
| %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13 |
| %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14 |
| %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15 |
| %res = bitcast <16 x i8> %res15 to <2 x i64> |
| ret <2 x i64> %res |
| } |
| |
| define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind { |
| ; X32-LABEL: test_mm_set1_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] |
| ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_set1_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: movd %edi, %xmm0 |
| ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] |
| ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] |
| ; X64-NEXT: retq |
| %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 |
| %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1 |
| %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2 |
| %res3 = insertelement <8 x i16> %res2, i16 %a0, i32 3 |
| %res4 = insertelement <8 x i16> %res3, i16 %a0, i32 4 |
| %res5 = insertelement <8 x i16> %res4, i16 %a0, i32 5 |
| %res6 = insertelement <8 x i16> %res5, i16 %a0, i32 6 |
| %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7 |
| %res = bitcast <8 x i16> %res7 to <2 x i64> |
| ret <2 x i64> %res |
| } |
| |
| define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind { |
| ; X32-LABEL: test_mm_set1_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_set1_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: movd %edi, %xmm0 |
| ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] |
| ; X64-NEXT: retq |
| %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 |
| %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1 |
| %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2 |
| %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3 |
| %res = bitcast <4 x i32> %res3 to <2 x i64> |
| ret <2 x i64> %res |
| } |
| |
| ; TODO test_mm_set1_epi64 |
| |
| define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind { |
| ; X32-LABEL: test_mm_set1_epi64x: |
| ; X32: # BB#0: |
| ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
| ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_set1_epi64x: |
| ; X64: # BB#0: |
| ; X64-NEXT: movq %rdi, %xmm0 |
| ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] |
| ; X64-NEXT: retq |
| %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 |
| %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 |
| ret <2 x i64> %res1 |
| } |
| |
| define <2 x double> @test_mm_set1_pd(double %a0) nounwind { |
| ; X32-LABEL: test_mm_set1_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_set1_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] |
| ; X64-NEXT: retq |
| %res0 = insertelement <2 x double> undef, double %a0, i32 0 |
| %res1 = insertelement <2 x double> %res0, double %a0, i32 1 |
| ret <2 x double> %res1 |
| } |
| |
| define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { |
| ; X32-LABEL: test_mm_setr_epi8: |
| ; X32: # BB#0: |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm1 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm2 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm3 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm1 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] |
| ; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm2 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm3 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm2 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm4 |
| ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] |
| ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] |
| ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_setr_epi8: |
| ; X64: # BB#0: |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm1 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm2 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm3 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm1 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] |
| ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax |
| ; X64-NEXT: movd %eax, %xmm2 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] |
| ; X64-NEXT: movzbl %r9b, %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: movzbl %r8b, %eax |
| ; X64-NEXT: movd %eax, %xmm3 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] |
| ; X64-NEXT: movzbl %cl, %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: movzbl %dl, %eax |
| ; X64-NEXT: movd %eax, %xmm2 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] |
| ; X64-NEXT: movzbl %sil, %eax |
| ; X64-NEXT: movd %eax, %xmm4 |
| ; X64-NEXT: movzbl %dil, %eax |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] |
| ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] |
| ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X64-NEXT: retq |
| %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0 |
| %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1 |
| %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2 |
| %res3 = insertelement <16 x i8> %res2, i8 %a3 , i32 3 |
| %res4 = insertelement <16 x i8> %res3, i8 %a4 , i32 4 |
| %res5 = insertelement <16 x i8> %res4, i8 %a5 , i32 5 |
| %res6 = insertelement <16 x i8> %res5, i8 %a6 , i32 6 |
| %res7 = insertelement <16 x i8> %res6, i8 %a7 , i32 7 |
| %res8 = insertelement <16 x i8> %res7, i8 %a8 , i32 8 |
| %res9 = insertelement <16 x i8> %res8, i8 %a9 , i32 9 |
| %res10 = insertelement <16 x i8> %res9, i8 %a10, i32 10 |
| %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11 |
| %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12 |
| %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13 |
| %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14 |
| %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15 |
| %res = bitcast <16 x i8> %res15 to <2 x i64> |
| ret <2 x i64> %res |
| } |
| |
| define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { |
| ; X32-LABEL: test_mm_setr_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm1 |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm2 |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm3 |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm4 |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm5 |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm6 |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm7 |
| ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax |
| ; X32-NEXT: movd %eax, %xmm0 |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] |
| ; X32-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] |
| ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] |
| ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_setr_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax |
| ; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w |
| ; X64-NEXT: movd %eax, %xmm0 |
| ; X64-NEXT: movd %r10d, %xmm1 |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] |
| ; X64-NEXT: movd %r9d, %xmm0 |
| ; X64-NEXT: movd %r8d, %xmm2 |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] |
| ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] |
| ; X64-NEXT: movd %ecx, %xmm0 |
| ; X64-NEXT: movd %edx, %xmm1 |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] |
| ; X64-NEXT: movd %esi, %xmm3 |
| ; X64-NEXT: movd %edi, %xmm0 |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] |
| ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
| ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] |
| ; X64-NEXT: retq |
| %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 |
| %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1 |
| %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2 |
| %res3 = insertelement <8 x i16> %res2, i16 %a3, i32 3 |
| %res4 = insertelement <8 x i16> %res3, i16 %a4, i32 4 |
| %res5 = insertelement <8 x i16> %res4, i16 %a5, i32 5 |
| %res6 = insertelement <8 x i16> %res5, i16 %a6, i32 6 |
| %res7 = insertelement <8 x i16> %res6, i16 %a7, i32 7 |
| %res = bitcast <8 x i16> %res7 to <2 x i64> |
| ret <2 x i64> %res |
| } |
| |
| define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { |
| ; X32-LABEL: test_mm_setr_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| ; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] |
| ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero |
| ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] |
| ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_setr_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: movd %ecx, %xmm0 |
| ; X64-NEXT: movd %edx, %xmm1 |
| ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] |
| ; X64-NEXT: movd %esi, %xmm2 |
| ; X64-NEXT: movd %edi, %xmm0 |
| ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] |
| ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X64-NEXT: retq |
| %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 |
| %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1 |
| %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2 |
| %res3 = insertelement <4 x i32> %res2, i32 %a3, i32 3 |
| %res = bitcast <4 x i32> %res3 to <2 x i64> |
| ret <2 x i64> %res |
| } |
| |
| ; TODO test_mm_setr_epi64 |
| |
| define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind { |
| ; X32-LABEL: test_mm_setr_epi64x: |
| ; X32: # BB#0: |
| ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] |
| ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero |
| ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] |
| ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_setr_epi64x: |
| ; X64: # BB#0: |
| ; X64-NEXT: movq %rsi, %xmm1 |
| ; X64-NEXT: movq %rdi, %xmm0 |
| ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X64-NEXT: retq |
| %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 |
| %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1 |
| ret <2 x i64> %res1 |
| } |
| |
| define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind { |
| ; X32-LABEL: test_mm_setr_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero |
| ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_setr_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X64-NEXT: retq |
| %res0 = insertelement <2 x double> undef, double %a0, i32 0 |
| %res1 = insertelement <2 x double> %res0, double %a1, i32 1 |
| ret <2 x double> %res1 |
| } |
| |
| define <2 x double> @test_mm_setzero_pd() { |
| ; X32-LABEL: test_mm_setzero_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: xorps %xmm0, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_setzero_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: xorps %xmm0, %xmm0 |
| ; X64-NEXT: retq |
| ret <2 x double> zeroinitializer |
| } |
| |
| define <2 x i64> @test_mm_setzero_si128() { |
| ; X32-LABEL: test_mm_setzero_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: xorps %xmm0, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_setzero_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: xorps %xmm0, %xmm0 |
| ; X64-NEXT: retq |
| ret <2 x i64> zeroinitializer |
| } |
| |
| define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) { |
| ; X32-LABEL: test_mm_shuffle_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_shuffle_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) { |
| ; X32-LABEL: test_mm_shuffle_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_shuffle_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] |
| ; X64-NEXT: retq |
| %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> |
| ret <2 x double> %res |
| } |
| |
| define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) { |
| ; X32-LABEL: test_mm_shufflehi_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_shufflehi_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4> |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) { |
| ; X32-LABEL: test_mm_shufflelo_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_shufflelo_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_sll_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: psllw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_sll_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: psllw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1) |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_sll_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: pslld %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_sll_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: pslld %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x i64> %a1 to <4 x i32> |
| %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1) |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone |
| |
| define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_sll_epi64: |
| ; X32: # BB#0: |
| ; X32-NEXT: psllq %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_sll_epi64: |
| ; X64: # BB#0: |
| ; X64-NEXT: psllq %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) |
| ret <2 x i64> %res |
| } |
| declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone |
| |
| define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) { |
| ; X32-LABEL: test_mm_slli_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: psllw $1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_slli_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: psllw $1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1) |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone |
| |
| define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) { |
| ; X32-LABEL: test_mm_slli_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: pslld $1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_slli_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: pslld $1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1) |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone |
| |
| define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) { |
| ; X32-LABEL: test_mm_slli_epi64: |
| ; X32: # BB#0: |
| ; X32-NEXT: psllq $1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_slli_epi64: |
| ; X64: # BB#0: |
| ; X64-NEXT: psllq $1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1) |
| ret <2 x i64> %res |
| } |
| declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone |
| |
| define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind { |
| ; X32-LABEL: test_mm_slli_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_slli_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind { |
| ; X32-LABEL: test_mm_sqrt_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: sqrtpd %xmm0, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_sqrt_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: sqrtpd %xmm0, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) |
| ret <2 x double> %res |
| } |
| declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone |
| |
| define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_sqrt_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: sqrtsd %xmm0, %xmm1 |
| ; X32-NEXT: movapd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_sqrt_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: sqrtsd %xmm0, %xmm1 |
| ; X64-NEXT: movapd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %call = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) |
| %ext0 = extractelement <2 x double> %call, i32 0 |
| %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 |
| %ext1 = extractelement <2 x double> %a1, i32 1 |
| %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 |
| ret <2 x double> %ins1 |
| } |
| declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone |
| |
| define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_sra_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: psraw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_sra_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: psraw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1) |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_sra_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: psrad %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_sra_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: psrad %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x i64> %a1 to <4 x i32> |
| %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1) |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone |
| |
| define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) { |
| ; X32-LABEL: test_mm_srai_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: psraw $1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_srai_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: psraw $1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1) |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone |
| |
| define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) { |
| ; X32-LABEL: test_mm_srai_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: psrad $1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_srai_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: psrad $1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1) |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone |
| |
| define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_srl_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: psrlw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_srl_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: psrlw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1) |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_srl_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: psrld %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_srl_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: psrld %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x i64> %a1 to <4 x i32> |
| %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1) |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone |
| |
| define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_srl_epi64: |
| ; X32: # BB#0: |
| ; X32-NEXT: psrlq %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_srl_epi64: |
| ; X64: # BB#0: |
| ; X64-NEXT: psrlq %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) |
| ret <2 x i64> %res |
| } |
| declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone |
| |
| define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) { |
| ; X32-LABEL: test_mm_srli_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: psrlw $1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_srli_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: psrlw $1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1) |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone |
| |
| define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) { |
| ; X32-LABEL: test_mm_srli_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: psrld $1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_srli_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: psrld $1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1) |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone |
| |
| define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) { |
| ; X32-LABEL: test_mm_srli_epi64: |
| ; X32: # BB#0: |
| ; X32-NEXT: psrlq $1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_srli_epi64: |
| ; X64: # BB#0: |
| ; X64-NEXT: psrlq $1, %xmm0 |
| ; X64-NEXT: retq |
| %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1) |
| ret <2 x i64> %res |
| } |
| declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone |
| |
| define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind { |
| ; X32-LABEL: test_mm_srli_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_srli_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define void @test_mm_store_pd(double *%a0, <2 x double> %a1) { |
| ; X32-LABEL: test_mm_store_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movaps %xmm0, (%eax) |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_store_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movaps %xmm0, (%rdi) |
| ; X64-NEXT: retq |
| %arg0 = bitcast double* %a0 to <2 x double>* |
| store <2 x double> %a1, <2 x double>* %arg0, align 16 |
| ret void |
| } |
| |
| define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) { |
| ; X32-LABEL: test_mm_store_pd1: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] |
| ; X32-NEXT: movaps %xmm0, (%eax) |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_store_pd1: |
| ; X64: # BB#0: |
| ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] |
| ; X64-NEXT: movaps %xmm0, (%rdi) |
| ; X64-NEXT: retq |
| %arg0 = bitcast double * %a0 to <2 x double>* |
| %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer |
| store <2 x double> %shuf, <2 x double>* %arg0, align 16 |
| ret void |
| } |
| |
| define void @test_mm_store_sd(double *%a0, <2 x double> %a1) { |
| ; X32-LABEL: test_mm_store_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movsd %xmm0, (%eax) |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_store_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movsd %xmm0, (%rdi) |
| ; X64-NEXT: retq |
| %ext = extractelement <2 x double> %a1, i32 0 |
| store double %ext, double* %a0, align 1 |
| ret void |
| } |
| |
| define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_store_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movaps %xmm0, (%eax) |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_store_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: movaps %xmm0, (%rdi) |
| ; X64-NEXT: retq |
| store <2 x i64> %a1, <2 x i64>* %a0, align 16 |
| ret void |
| } |
| |
| define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) { |
| ; X32-LABEL: test_mm_store1_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] |
| ; X32-NEXT: movaps %xmm0, (%eax) |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_store1_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] |
| ; X64-NEXT: movaps %xmm0, (%rdi) |
| ; X64-NEXT: retq |
| %arg0 = bitcast double * %a0 to <2 x double>* |
| %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer |
| store <2 x double> %shuf, <2 x double>* %arg0, align 16 |
| ret void |
| } |
| |
| define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) { |
| ; X32-LABEL: test_mm_storeh_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] |
| ; X32-NEXT: movsd %xmm0, (%eax) |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_storeh_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] |
| ; X64-NEXT: movsd %xmm0, (%rdi) |
| ; X64-NEXT: retq |
| %ext = extractelement <2 x double> %a1, i32 1 |
| store double %ext, double* %a0, align 8 |
| ret void |
| } |
| |
| define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_storel_epi64: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movlps %xmm0, (%eax) |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_storel_epi64: |
| ; X64: # BB#0: |
| ; X64-NEXT: movq %xmm0, %rax |
| ; X64-NEXT: movq %rax, (%rdi) |
| ; X64-NEXT: retq |
| %ext = extractelement <2 x i64> %a1, i32 0 |
| %bc = bitcast <2 x i64> *%a0 to i64* |
| store i64 %ext, i64* %bc, align 8 |
| ret void |
| } |
| |
| define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) { |
| ; X32-LABEL: test_mm_storel_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movsd %xmm0, (%eax) |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_storel_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movsd %xmm0, (%rdi) |
| ; X64-NEXT: retq |
| %ext = extractelement <2 x double> %a1, i32 0 |
| store double %ext, double* %a0, align 8 |
| ret void |
| } |
| |
| define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) { |
| ; X32-LABEL: test_mm_storer_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] |
| ; X32-NEXT: movapd %xmm0, (%eax) |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_storer_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] |
| ; X64-NEXT: movapd %xmm0, (%rdi) |
| ; X64-NEXT: retq |
| %arg0 = bitcast double* %a0 to <2 x double>* |
| %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0> |
| store <2 x double> %shuf, <2 x double>* %arg0, align 16 |
| ret void |
| } |
| |
| define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) { |
| ; X32-LABEL: test_mm_storeu_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movups %xmm0, (%eax) |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_storeu_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movups %xmm0, (%rdi) |
| ; X64-NEXT: retq |
| %arg0 = bitcast double* %a0 to <2 x double>* |
| store <2 x double> %a1, <2 x double>* %arg0, align 1 |
| ret void |
| } |
| |
| define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_storeu_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movups %xmm0, (%eax) |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_storeu_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: movups %xmm0, (%rdi) |
| ; X64-NEXT: retq |
| store <2 x i64> %a1, <2 x i64>* %a0, align 1 |
| ret void |
| } |
| |
| define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) { |
| ; X32-LABEL: test_mm_stream_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movntps %xmm0, (%eax) |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_stream_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: movntps %xmm0, (%rdi) |
| ; X64-NEXT: retq |
| %arg0 = bitcast double* %a0 to <2 x double>* |
| store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_mm_stream_si32(i32 *%a0, i32 %a1) { |
| ; X32-LABEL: test_mm_stream_si32: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: movntil %eax, (%ecx) |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_stream_si32: |
| ; X64: # BB#0: |
| ; X64-NEXT: movntil %esi, (%rdi) |
| ; X64-NEXT: retq |
| store i32 %a1, i32* %a0, align 1, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_stream_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movntps %xmm0, (%eax) |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_stream_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: movntps %xmm0, (%rdi) |
| ; X64-NEXT: retq |
| store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0 |
| ret void |
| } |
| |
| define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_sub_epi8: |
| ; X32: # BB#0: |
| ; X32-NEXT: psubb %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_sub_epi8: |
| ; X64: # BB#0: |
| ; X64-NEXT: psubb %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| %res = sub <16 x i8> %arg0, %arg1 |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_sub_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: psubw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_sub_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: psubw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = sub <8 x i16> %arg0, %arg1 |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_sub_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: psubd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_sub_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: psubd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x i64> %a1 to <4 x i32> |
| %res = sub <4 x i32> %arg0, %arg1 |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_sub_epi64: |
| ; X32: # BB#0: |
| ; X32-NEXT: psubq %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_sub_epi64: |
| ; X64: # BB#0: |
| ; X64-NEXT: psubq %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = sub <2 x i64> %a0, %a1 |
| ret <2 x i64> %res |
| } |
| |
| define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_sub_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: subpd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_sub_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: subpd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = fsub <2 x double> %a0, %a1 |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_sub_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: subsd %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_sub_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: subsd %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %ext0 = extractelement <2 x double> %a0, i32 0 |
| %ext1 = extractelement <2 x double> %a1, i32 0 |
| %fsub = fsub double %ext0, %ext1 |
| %res = insertelement <2 x double> %a0, double %fsub, i32 0 |
| ret <2 x double> %res |
| } |
| |
| define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_subs_epi8: |
| ; X32: # BB#0: |
| ; X32-NEXT: psubsb %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_subs_epi8: |
| ; X64: # BB#0: |
| ; X64-NEXT: psubsb %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1) |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone |
| |
| define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_subs_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: psubsw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_subs_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: psubsw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1) |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_subs_epu8: |
| ; X32: # BB#0: |
| ; X32-NEXT: psubusb %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_subs_epu8: |
| ; X64: # BB#0: |
| ; X64-NEXT: psubusb %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1) |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone |
| |
| define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_subs_epu16: |
| ; X32: # BB#0: |
| ; X32-NEXT: psubusw %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_subs_epu16: |
| ; X64: # BB#0: |
| ; X64-NEXT: psubusw %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1) |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_ucomieq_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: ucomisd %xmm1, %xmm0 |
| ; X32-NEXT: setnp %al |
| ; X32-NEXT: sete %cl |
| ; X32-NEXT: andb %al, %cl |
| ; X32-NEXT: movzbl %cl, %eax |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_ucomieq_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: ucomisd %xmm1, %xmm0 |
| ; X64-NEXT: setnp %al |
| ; X64-NEXT: sete %cl |
| ; X64-NEXT: andb %al, %cl |
| ; X64-NEXT: movzbl %cl, %eax |
| ; X64-NEXT: retq |
| %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_ucomige_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: xorl %eax, %eax |
| ; X32-NEXT: ucomisd %xmm1, %xmm0 |
| ; X32-NEXT: setae %al |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_ucomige_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: ucomisd %xmm1, %xmm0 |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: retq |
| %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_ucomigt_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: xorl %eax, %eax |
| ; X32-NEXT: ucomisd %xmm1, %xmm0 |
| ; X32-NEXT: seta %al |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_ucomigt_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: ucomisd %xmm1, %xmm0 |
| ; X64-NEXT: seta %al |
| ; X64-NEXT: retq |
| %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_ucomile_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: xorl %eax, %eax |
| ; X32-NEXT: ucomisd %xmm0, %xmm1 |
| ; X32-NEXT: setae %al |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_ucomile_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: ucomisd %xmm0, %xmm1 |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: retq |
| %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_ucomilt_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: xorl %eax, %eax |
| ; X32-NEXT: ucomisd %xmm0, %xmm1 |
| ; X32-NEXT: seta %al |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_ucomilt_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: ucomisd %xmm0, %xmm1 |
| ; X64-NEXT: seta %al |
| ; X64-NEXT: retq |
| %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_ucomineq_sd: |
| ; X32: # BB#0: |
| ; X32-NEXT: ucomisd %xmm1, %xmm0 |
| ; X32-NEXT: setp %al |
| ; X32-NEXT: setne %cl |
| ; X32-NEXT: orb %al, %cl |
| ; X32-NEXT: movzbl %cl, %eax |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_ucomineq_sd: |
| ; X64: # BB#0: |
| ; X64-NEXT: ucomisd %xmm1, %xmm0 |
| ; X64-NEXT: setp %al |
| ; X64-NEXT: setne %cl |
| ; X64-NEXT: orb %al, %cl |
| ; X64-NEXT: movzbl %cl, %eax |
| ; X64-NEXT: retq |
| %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) |
| ret i32 %res |
| } |
| declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone |
| |
| define <2 x double> @test_mm_undefined_pd() { |
| ; X32-LABEL: test_mm_undefined_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_undefined_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: retq |
| ret <2 x double> undef |
| } |
| |
| define <2 x i64> @test_mm_undefined_si128() { |
| ; X32-LABEL: test_mm_undefined_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_undefined_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: retq |
| ret <2 x i64> undef |
| } |
| |
| define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_unpackhi_epi8: |
| ; X32: # BB#0: |
| ; X32-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_unpackhi_epi8: |
| ; X64: # BB#0: |
| ; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_unpackhi_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_unpackhi_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_unpackhi_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_unpackhi_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x i64> %a1 to <4 x i32> |
| %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_unpackhi_epi64: |
| ; X32: # BB#0: |
| ; X32-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_unpackhi_epi64: |
| ; X64: # BB#0: |
| ; X64-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] |
| ; X64-NEXT: retq |
| %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3> |
| ret <2 x i64> %res |
| } |
| |
| define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) { |
| ; X32-LABEL: test_mm_unpackhi_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_unpackhi_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] |
| ; X64-NEXT: retq |
| %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> |
| ret <2 x double> %res |
| } |
| |
| define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_unpacklo_epi8: |
| ; X32: # BB#0: |
| ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_unpacklo_epi8: |
| ; X64: # BB#0: |
| ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <16 x i8> |
| %arg1 = bitcast <2 x i64> %a1 to <16 x i8> |
| %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> |
| %bc = bitcast <16 x i8> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_unpacklo_epi16: |
| ; X32: # BB#0: |
| ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_unpacklo_epi16: |
| ; X64: # BB#0: |
| ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <8 x i16> |
| %arg1 = bitcast <2 x i64> %a1 to <8 x i16> |
| %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> |
| %bc = bitcast <8 x i16> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_unpacklo_epi32: |
| ; X32: # BB#0: |
| ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_unpacklo_epi32: |
| ; X64: # BB#0: |
| ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x i64> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x i64> %a1 to <4 x i32> |
| %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> |
| %bc = bitcast <4 x i32> %res to <2 x i64> |
| ret <2 x i64> %bc |
| } |
| |
| define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) { |
| ; X32-LABEL: test_mm_unpacklo_epi64: |
| ; X32: # BB#0: |
| ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_unpacklo_epi64: |
| ; X64: # BB#0: |
| ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X64-NEXT: retq |
| %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2> |
| ret <2 x i64> %res |
| } |
| |
| define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) { |
| ; X32-LABEL: test_mm_unpacklo_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_unpacklo_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| ; X64-NEXT: retq |
| %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind { |
| ; X32-LABEL: test_mm_xor_pd: |
| ; X32: # BB#0: |
| ; X32-NEXT: xorps %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_xor_pd: |
| ; X64: # BB#0: |
| ; X64-NEXT: xorps %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %arg0 = bitcast <2 x double> %a0 to <4 x i32> |
| %arg1 = bitcast <2 x double> %a1 to <4 x i32> |
| %res = xor <4 x i32> %arg0, %arg1 |
| %bc = bitcast <4 x i32> %res to <2 x double> |
| ret <2 x double> %bc |
| } |
| |
| define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { |
| ; X32-LABEL: test_mm_xor_si128: |
| ; X32: # BB#0: |
| ; X32-NEXT: xorps %xmm1, %xmm0 |
| ; X32-NEXT: retl |
| ; |
| ; X64-LABEL: test_mm_xor_si128: |
| ; X64: # BB#0: |
| ; X64-NEXT: xorps %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %res = xor <2 x i64> %a0, %a1 |
| ret <2 x i64> %res |
| } |
| |
| !0 = !{i32 1} |
| |