|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 | 
|  | ; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefixes=ALL,X86 | 
|  | ; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+avx | FileCheck %s --check-prefixes=ALL,X86 | 
|  | ; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefixes=ALL,X64 | 
|  | ; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+avx | FileCheck %s --check-prefixes=ALL,X64 | 
|  |  | 
|  | declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test1: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    phaddw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test1: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    phaddw %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %2 = bitcast <4 x i16> %1 to x86_mmx | 
|  | %3 = bitcast <4 x i16> %0 to x86_mmx | 
|  | %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone | 
|  | %5 = bitcast x86_mmx %4 to <4 x i16> | 
|  | %6 = bitcast <4 x i16> %5 to <1 x i64> | 
|  | %7 = extractelement <1 x i64> %6, i32 0 | 
|  | ret i64 %7 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test88: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pcmpgtd {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test88: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pcmpgtd %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <2 x i32> | 
|  | %1 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <2 x i32> | 
|  | %4 = bitcast <2 x i32> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test87: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pcmpgtw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test87: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pcmpgtw %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test86: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pcmpgtb {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test86: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pcmpgtb %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test85: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pcmpeqd {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test85: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pcmpeqd %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <2 x i32> | 
|  | %1 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <2 x i32> | 
|  | %4 = bitcast <2 x i32> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test84: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pcmpeqw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test84: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pcmpeqw %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test83: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pcmpeqb {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test83: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pcmpeqb %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test82: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    punpckldq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0] | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test82: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <2 x i32> | 
|  | %1 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <2 x i32> | 
|  | %4 = bitcast <2 x i32> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test81: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    punpcklwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test81: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test80: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    punpcklbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test80: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test79: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    punpckhdq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[1],mem[1] | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test79: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <2 x i32> | 
|  | %1 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <2 x i32> | 
|  | %4 = bitcast <2 x i32> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test78: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    punpckhwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test78: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test77: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    punpckhbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test77: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test76: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    packuswb {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test76: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    packuswb %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test75: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    packssdw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test75: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    packssdw %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <2 x i32> | 
|  | %1 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test74: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    packsswb {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test74: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    packsswb %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone | 
|  |  | 
|  | define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test73: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psrad $3, %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test73: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    psrad $3, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind | 
|  | %2 = bitcast x86_mmx %1 to <2 x i32> | 
|  | %3 = bitcast <2 x i32> %2 to <1 x i64> | 
|  | %4 = extractelement <1 x i64> %3, i32 0 | 
|  | ret i64 %4 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone | 
|  |  | 
|  | define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test72: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psraw $3, %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test72: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    psraw $3, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind | 
|  | %2 = bitcast x86_mmx %1 to <4 x i16> | 
|  | %3 = bitcast <4 x i16> %2 to <1 x i64> | 
|  | %4 = extractelement <1 x i64> %3, i32 0 | 
|  | ret i64 %4 | 
|  | } | 
|  |  | 
|  | define i64 @test72_2(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test72_2: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test72_2: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 0) nounwind | 
|  | %2 = bitcast x86_mmx %1 to <4 x i16> | 
|  | %3 = bitcast <4 x i16> %2 to <1 x i64> | 
|  | %4 = extractelement <1 x i64> %3, i32 0 | 
|  | ret i64 %4 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone | 
|  |  | 
|  | define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test71: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $8, %esp | 
|  | ; X86-NEXT:    movq 8(%ebp), %mm0 | 
|  | ; X86-NEXT:    psrlq $3, %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test71: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    psrlq $3, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = extractelement <1 x i64> %a, i32 0 | 
|  | %mmx_var.i = bitcast i64 %0 to x86_mmx | 
|  | %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind | 
|  | %2 = bitcast x86_mmx %1 to i64 | 
|  | ret i64 %2 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone | 
|  |  | 
|  | define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test70: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psrld $3, %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test70: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    psrld $3, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind | 
|  | %2 = bitcast x86_mmx %1 to <2 x i32> | 
|  | %3 = bitcast <2 x i32> %2 to <1 x i64> | 
|  | %4 = extractelement <1 x i64> %3, i32 0 | 
|  | ret i64 %4 | 
|  | } | 
|  |  | 
|  | define i64 @test70_2(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test70_2: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test70_2: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 0) nounwind | 
|  | %2 = bitcast x86_mmx %1 to <2 x i32> | 
|  | %3 = bitcast <2 x i32> %2 to <1 x i64> | 
|  | %4 = extractelement <1 x i64> %3, i32 0 | 
|  | ret i64 %4 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone | 
|  |  | 
|  | define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test69: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psrlw $3, %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test69: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    psrlw $3, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind | 
|  | %2 = bitcast x86_mmx %1 to <4 x i16> | 
|  | %3 = bitcast <4 x i16> %2 to <1 x i64> | 
|  | %4 = extractelement <1 x i64> %3, i32 0 | 
|  | ret i64 %4 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone | 
|  |  | 
|  | define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test68: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $8, %esp | 
|  | ; X86-NEXT:    movq 8(%ebp), %mm0 | 
|  | ; X86-NEXT:    psllq $3, %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test68: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    psllq $3, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = extractelement <1 x i64> %a, i32 0 | 
|  | %mmx_var.i = bitcast i64 %0 to x86_mmx | 
|  | %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind | 
|  | %2 = bitcast x86_mmx %1 to i64 | 
|  | ret i64 %2 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone | 
|  |  | 
|  | define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test67: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pslld $3, %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test67: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    pslld $3, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind | 
|  | %2 = bitcast x86_mmx %1 to <2 x i32> | 
|  | %3 = bitcast <2 x i32> %2 to <1 x i64> | 
|  | %4 = extractelement <1 x i64> %3, i32 0 | 
|  | ret i64 %4 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone | 
|  |  | 
|  | define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test66: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psllw $3, %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test66: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    psllw $3, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind | 
|  | %2 = bitcast x86_mmx %1 to <4 x i16> | 
|  | %3 = bitcast <4 x i16> %2 to <1 x i64> | 
|  | %4 = extractelement <1 x i64> %3, i32 0 | 
|  | ret i64 %4 | 
|  | } | 
|  |  | 
|  | define i64 @test66_2(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test66_2: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test66_2: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 0) nounwind | 
|  | %2 = bitcast x86_mmx %1 to <4 x i16> | 
|  | %3 = bitcast <4 x i16> %2 to <1 x i64> | 
|  | %4 = extractelement <1 x i64> %3, i32 0 | 
|  | ret i64 %4 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test65: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psrad 16(%ebp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test65: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psrad %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %1 = extractelement <1 x i64> %b, i32 0 | 
|  | %mmx_var1.i = bitcast i64 %1 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <2 x i32> | 
|  | %4 = bitcast <2 x i32> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test64: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psraw 16(%ebp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test64: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psraw %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %1 = extractelement <1 x i64> %b, i32 0 | 
|  | %mmx_var1.i = bitcast i64 %1 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test63: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $8, %esp | 
|  | ; X86-NEXT:    movq 8(%ebp), %mm0 | 
|  | ; X86-NEXT:    psrlq 16(%ebp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test63: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psrlq %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = extractelement <1 x i64> %a, i32 0 | 
|  | %mmx_var.i = bitcast i64 %0 to x86_mmx | 
|  | %1 = extractelement <1 x i64> %b, i32 0 | 
|  | %mmx_var1.i = bitcast i64 %1 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to i64 | 
|  | ret i64 %3 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test62: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psrld 16(%ebp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test62: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psrld %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %1 = extractelement <1 x i64> %b, i32 0 | 
|  | %mmx_var1.i = bitcast i64 %1 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <2 x i32> | 
|  | %4 = bitcast <2 x i32> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test61: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psrlw 16(%ebp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test61: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psrlw %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %1 = extractelement <1 x i64> %b, i32 0 | 
|  | %mmx_var1.i = bitcast i64 %1 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test60: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $8, %esp | 
|  | ; X86-NEXT:    movq 8(%ebp), %mm0 | 
|  | ; X86-NEXT:    psllq 16(%ebp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test60: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psllq %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = extractelement <1 x i64> %a, i32 0 | 
|  | %mmx_var.i = bitcast i64 %0 to x86_mmx | 
|  | %1 = extractelement <1 x i64> %b, i32 0 | 
|  | %mmx_var1.i = bitcast i64 %1 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to i64 | 
|  | ret i64 %3 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test59: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pslld 16(%ebp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test59: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pslld %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %1 = extractelement <1 x i64> %b, i32 0 | 
|  | %mmx_var1.i = bitcast i64 %1 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <2 x i32> | 
|  | %4 = bitcast <2 x i32> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test58: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psllw 16(%ebp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test58: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psllw %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %1 = extractelement <1 x i64> %b, i32 0 | 
|  | %mmx_var1.i = bitcast i64 %1 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test56: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pxor {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test56: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pxor %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <2 x i32> | 
|  | %1 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <2 x i32> | 
|  | %4 = bitcast <2 x i32> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test55: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    por {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test55: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    por %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <2 x i32> | 
|  | %1 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <2 x i32> | 
|  | %4 = bitcast <2 x i32> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test54: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pandn {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test54: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pandn %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <2 x i32> | 
|  | %1 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <2 x i32> | 
|  | %4 = bitcast <2 x i32> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test53: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pand {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test53: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pand %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <2 x i32> | 
|  | %1 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <2 x i32> | 
|  | %4 = bitcast <2 x i32> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test52: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pmullw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test52: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pmullw %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test51: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pmullw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test51: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pmullw %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test50: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pmulhw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test50: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pmulhw %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test49: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pmaddwd {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test49: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pmaddwd %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <2 x i32> | 
|  | %4 = bitcast <2 x i32> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test48: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psubusw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test48: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psubusw %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test47: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psubusb {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test47: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psubusb %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test46: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psubsw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test46: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psubsw %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test45: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psubsb {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test45: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psubsb %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test44: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $8, %esp | 
|  | ; X86-NEXT:    movq 8(%ebp), %mm0 | 
|  | ; X86-NEXT:    psubq 16(%ebp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test44: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psubq %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = extractelement <1 x i64> %a, i32 0 | 
|  | %mmx_var = bitcast i64 %0 to x86_mmx | 
|  | %1 = extractelement <1 x i64> %b, i32 0 | 
|  | %mmx_var1 = bitcast i64 %1 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1) | 
|  | %3 = bitcast x86_mmx %2 to i64 | 
|  | ret i64 %3 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test43: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psubd {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test43: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psubd %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <2 x i32> | 
|  | %1 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <2 x i32> | 
|  | %4 = bitcast <2 x i32> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test42: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psubw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test42: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psubw %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test41: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psubb {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test41: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psubb %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test40: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    paddusw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test40: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    paddusw %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test39: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    paddusb {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test39: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    paddusb %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test38: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    paddsw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test38: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    paddsw %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test37: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    paddsb {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test37: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    paddsb %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test36: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $8, %esp | 
|  | ; X86-NEXT:    movq 8(%ebp), %mm0 | 
|  | ; X86-NEXT:    paddq 16(%ebp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test36: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    paddq %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = extractelement <1 x i64> %a, i32 0 | 
|  | %mmx_var = bitcast i64 %0 to x86_mmx | 
|  | %1 = extractelement <1 x i64> %b, i32 0 | 
|  | %mmx_var1 = bitcast i64 %1 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1) | 
|  | %3 = bitcast x86_mmx %2 to i64 | 
|  | ret i64 %3 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test35: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    paddd {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test35: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    paddd %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <2 x i32> | 
|  | %1 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <2 x i32> | 
|  | %4 = bitcast <2 x i32> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test34: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    paddw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test34: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    paddw %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test33: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    paddb {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test33: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    paddb %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test32: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psadbw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test32: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psadbw %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to i64 | 
|  | ret i64 %3 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test31: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pminsw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test31: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pminsw %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test30: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pminub {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test30: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pminub %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test29: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pmaxsw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test29: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pmaxsw %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test28: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pmaxub {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test28: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pmaxub %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test27: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pavgw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test27: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pavgw %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test26: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pavgb {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test26: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pavgb %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare void @llvm.x86.mmx.movnt.dq(ptr, x86_mmx) nounwind | 
|  |  | 
|  | define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp { | 
|  | ; X86-LABEL: test25: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
|  | ; X86-NEXT:    movntq %mm0, (%eax) | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test25: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rsi, %mm0 | 
|  | ; X64-NEXT:    movntq %mm0, (%rdi) | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = extractelement <1 x i64> %a, i32 0 | 
|  | %mmx_var.i = bitcast i64 %0 to x86_mmx | 
|  | tail call void @llvm.x86.mmx.movnt.dq(ptr %p, x86_mmx %mmx_var.i) nounwind | 
|  | ret void | 
|  | } | 
|  |  | 
|  | declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone | 
|  |  | 
|  | define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test24: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $8, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, (%esp) | 
|  | ; X86-NEXT:    movq (%esp), %mm0 | 
|  | ; X86-NEXT:    pmovmskb %mm0, %eax | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test24: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    pmovmskb %mm0, %eax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind | 
|  | ret i32 %1 | 
|  | } | 
|  |  | 
|  | declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, ptr) nounwind | 
|  |  | 
|  | define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp { | 
|  | ; X86-LABEL: test23: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    pushl %edi | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, (%esp) | 
|  | ; X86-NEXT:    movl 24(%ebp), %edi | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq (%esp), %mm1 | 
|  | ; X86-NEXT:    maskmovq %mm0, %mm1 | 
|  | ; X86-NEXT:    leal -4(%ebp), %esp | 
|  | ; X86-NEXT:    popl %edi | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test23: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    movq %rdx, %rdi | 
|  | ; X64-NEXT:    maskmovq %mm1, %mm0 | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %n to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %d to <8 x i8> | 
|  | %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx | 
|  | tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, ptr %p) nounwind | 
|  | ret void | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test22: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pmulhuw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test22: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pmulhuw %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone | 
|  |  | 
|  | define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test21: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    pshufw $3, {{[0-9]+}}(%esp), %mm0 # mm0 = mem[3,0,0,0] | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test21: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    pshufw $3, %mm0, %mm0 # mm0 = mm0[3,0,0,0] | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %1 = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test21_2: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $8, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, (%esp) | 
|  | ; X86-NEXT:    pshufw $3, (%esp), %mm0 # mm0 = mem[3,0,0,0] | 
|  | ; X86-NEXT:    movd %mm0, %eax | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test21_2: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    pshufw $3, %mm0, %mm0 # mm0 = mm0[3,0,0,0] | 
|  | ; X64-NEXT:    movd %mm0, %eax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %1 = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <2 x i32> | 
|  | %5 = extractelement <2 x i32> %4, i32 0 | 
|  | ret i32 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test20: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pmuludq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test20: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pmuludq %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <2 x i32> | 
|  | %1 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx | 
|  | %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind | 
|  | %3 = bitcast x86_mmx %2 to i64 | 
|  | ret i64 %3 | 
|  | } | 
|  |  | 
|  | declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone | 
|  |  | 
|  | define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test19: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $8, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, (%esp) | 
|  | ; X86-NEXT:    cvtpi2pd (%esp), %xmm0 | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test19: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    cvtpi2pd %mm0, %xmm0 | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %1 = bitcast <2 x i32> %0 to x86_mmx | 
|  | %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone | 
|  | ret <2 x double> %2 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone | 
|  |  | 
|  | define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test18: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $8, %esp | 
|  | ; X86-NEXT:    cvttpd2pi %xmm0, %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test18: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    cvttpd2pi %xmm0, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone | 
|  | %1 = bitcast x86_mmx %0 to <2 x i32> | 
|  | %2 = bitcast <2 x i32> %1 to <1 x i64> | 
|  | %3 = extractelement <1 x i64> %2, i32 0 | 
|  | ret i64 %3 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone | 
|  |  | 
|  | define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test17: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $8, %esp | 
|  | ; X86-NEXT:    cvtpd2pi %xmm0, %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test17: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    cvtpd2pi %xmm0, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone | 
|  | %1 = bitcast x86_mmx %0 to <2 x i32> | 
|  | %2 = bitcast <2 x i32> %1 to <1 x i64> | 
|  | %3 = extractelement <1 x i64> %2, i32 0 | 
|  | ret i64 %3 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone | 
|  |  | 
|  | define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test16: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $8, %esp | 
|  | ; X86-NEXT:    movq 8(%ebp), %mm0 | 
|  | ; X86-NEXT:    palignr $16, 16(%ebp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test16: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    palignr $16, %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = extractelement <1 x i64> %a, i32 0 | 
|  | %mmx_var = bitcast i64 %0 to x86_mmx | 
|  | %1 = extractelement <1 x i64> %b, i32 0 | 
|  | %mmx_var1 = bitcast i64 %1 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16) | 
|  | %3 = bitcast x86_mmx %2 to i64 | 
|  | ret i64 %3 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test15: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    pabsd {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test15: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    pabsd %mm0, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %1 = bitcast <2 x i32> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone | 
|  | %3 = bitcast x86_mmx %2 to <2 x i32> | 
|  | %4 = bitcast <2 x i32> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test14: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    pabsw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test14: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    pabsw %mm0, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %1 = bitcast <4 x i16> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone | 
|  | %3 = bitcast x86_mmx %2 to <4 x i16> | 
|  | %4 = bitcast <4 x i16> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test13: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    pabsb {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test13: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    pabsb %mm0, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %1 = bitcast <8 x i8> %0 to x86_mmx | 
|  | %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone | 
|  | %3 = bitcast x86_mmx %2 to <8 x i8> | 
|  | %4 = bitcast <8 x i8> %3 to <1 x i64> | 
|  | %5 = extractelement <1 x i64> %4, i32 0 | 
|  | ret i64 %5 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test12: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psignd {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test12: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psignd %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <2 x i32> | 
|  | %1 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %2 = bitcast <2 x i32> %1 to x86_mmx | 
|  | %3 = bitcast <2 x i32> %0 to x86_mmx | 
|  | %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone | 
|  | %5 = bitcast x86_mmx %4 to <2 x i32> | 
|  | %6 = bitcast <2 x i32> %5 to <1 x i64> | 
|  | %7 = extractelement <1 x i64> %6, i32 0 | 
|  | ret i64 %7 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test11: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psignw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test11: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psignw %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %2 = bitcast <4 x i16> %1 to x86_mmx | 
|  | %3 = bitcast <4 x i16> %0 to x86_mmx | 
|  | %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone | 
|  | %5 = bitcast x86_mmx %4 to <4 x i16> | 
|  | %6 = bitcast <4 x i16> %5 to <1 x i64> | 
|  | %7 = extractelement <1 x i64> %6, i32 0 | 
|  | ret i64 %7 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test10: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    psignb {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test10: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    psignb %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %2 = bitcast <8 x i8> %1 to x86_mmx | 
|  | %3 = bitcast <8 x i8> %0 to x86_mmx | 
|  | %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone | 
|  | %5 = bitcast x86_mmx %4 to <8 x i8> | 
|  | %6 = bitcast <8 x i8> %5 to <1 x i64> | 
|  | %7 = extractelement <1 x i64> %6, i32 0 | 
|  | ret i64 %7 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test9: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pshufb {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test9: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pshufb %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %2 = bitcast <8 x i8> %1 to x86_mmx | 
|  | %3 = bitcast <8 x i8> %0 to x86_mmx | 
|  | %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone | 
|  | %5 = bitcast x86_mmx %4 to <8 x i8> | 
|  | %6 = bitcast <8 x i8> %5 to <1 x i64> | 
|  | %7 = extractelement <1 x i64> %6, i32 0 | 
|  | ret i64 %7 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test8: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pmulhrsw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test8: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pmulhrsw %mm0, %mm1 | 
|  | ; X64-NEXT:    movq %mm1, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %2 = bitcast <4 x i16> %1 to x86_mmx | 
|  | %3 = bitcast <4 x i16> %0 to x86_mmx | 
|  | %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone | 
|  | %5 = bitcast x86_mmx %4 to <4 x i16> | 
|  | %6 = bitcast <4 x i16> %5 to <1 x i64> | 
|  | %7 = extractelement <1 x i64> %6, i32 0 | 
|  | ret i64 %7 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test7: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pmaddubsw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test7: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    pmaddubsw %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <8 x i8> | 
|  | %1 = bitcast <1 x i64> %a to <8 x i8> | 
|  | %2 = bitcast <8 x i8> %1 to x86_mmx | 
|  | %3 = bitcast <8 x i8> %0 to x86_mmx | 
|  | %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone | 
|  | %5 = bitcast x86_mmx %4 to <8 x i8> | 
|  | %6 = bitcast <8 x i8> %5 to <1 x i64> | 
|  | %7 = extractelement <1 x i64> %6, i32 0 | 
|  | ret i64 %7 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test6: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    phsubsw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test6: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    phsubsw %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %2 = bitcast <4 x i16> %1 to x86_mmx | 
|  | %3 = bitcast <4 x i16> %0 to x86_mmx | 
|  | %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone | 
|  | %5 = bitcast x86_mmx %4 to <4 x i16> | 
|  | %6 = bitcast <4 x i16> %5 to <1 x i64> | 
|  | %7 = extractelement <1 x i64> %6, i32 0 | 
|  | ret i64 %7 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test5: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    phsubd {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test5: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    phsubd %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <2 x i32> | 
|  | %1 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %2 = bitcast <2 x i32> %1 to x86_mmx | 
|  | %3 = bitcast <2 x i32> %0 to x86_mmx | 
|  | %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone | 
|  | %5 = bitcast x86_mmx %4 to <2 x i32> | 
|  | %6 = bitcast <2 x i32> %5 to <1 x i64> | 
|  | %7 = extractelement <1 x i64> %6, i32 0 | 
|  | ret i64 %7 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test4: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    phsubw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test4: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    phsubw %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %2 = bitcast <4 x i16> %1 to x86_mmx | 
|  | %3 = bitcast <4 x i16> %0 to x86_mmx | 
|  | %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone | 
|  | %5 = bitcast x86_mmx %4 to <4 x i16> | 
|  | %6 = bitcast <4 x i16> %5 to <1 x i64> | 
|  | %7 = extractelement <1 x i64> %6, i32 0 | 
|  | ret i64 %7 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test3: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    phaddsw {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test3: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    phaddsw %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <4 x i16> | 
|  | %1 = bitcast <1 x i64> %a to <4 x i16> | 
|  | %2 = bitcast <4 x i16> %1 to x86_mmx | 
|  | %3 = bitcast <4 x i16> %0 to x86_mmx | 
|  | %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone | 
|  | %5 = bitcast x86_mmx %4 to <4 x i16> | 
|  | %6 = bitcast <4 x i16> %5 to <1 x i64> | 
|  | %7 = extractelement <1 x i64> %6, i32 0 | 
|  | ret i64 %7 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { | 
|  | ; X86-LABEL: test2: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $24, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl 16(%ebp), %eax | 
|  | ; X86-NEXT:    movl 20(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    phaddd {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test2: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    movq %rsi, %mm1 | 
|  | ; X64-NEXT:    phaddd %mm1, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %b to <2 x i32> | 
|  | %1 = bitcast <1 x i64> %a to <2 x i32> | 
|  | %2 = bitcast <2 x i32> %1 to x86_mmx | 
|  | %3 = bitcast <2 x i32> %0 to x86_mmx | 
|  | %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone | 
|  | %5 = bitcast x86_mmx %4 to <2 x i32> | 
|  | %6 = bitcast <2 x i32> %5 to <1 x i64> | 
|  | %7 = extractelement <1 x i64> %6, i32 0 | 
|  | ret i64 %7 | 
|  | } | 
|  |  | 
|  | define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind { | 
|  | ; ALL-LABEL: test89: | 
|  | ; ALL:       # %bb.0: | 
|  | ; ALL-NEXT:    cvtpi2ps %mm0, %xmm0 | 
|  | ; ALL-NEXT:    ret{{[l|q]}} | 
|  | %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b) | 
|  | ret <4 x float> %c | 
|  | } | 
|  |  | 
|  | declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone | 
|  |  | 
|  | define void @test90() { | 
|  | ; ALL-LABEL: test90: | 
|  | ; ALL:       # %bb.0: | 
|  | ; ALL-NEXT:    emms | 
|  | ; ALL-NEXT:    ret{{[l|q]}} | 
|  | call void @llvm.x86.mmx.emms() | 
|  | ret void | 
|  | } | 
|  |  | 
|  | declare void @llvm.x86.mmx.emms() | 
|  |  | 
|  | define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind { | 
|  | ; X86-LABEL: test_mm_insert_pi16: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $16, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | ; X86-NEXT:    pinsrw $2, 16(%ebp), %mm0 | 
|  | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | ; X86-NEXT:    movl (%esp), %eax | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test_mm_insert_pi16: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    pinsrw $2, %esi, %mm0 | 
|  | ; X64-NEXT:    movq %mm0, %rax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a.coerce to x86_mmx | 
|  | %1 = tail call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %0, i32 %d, i32 2) | 
|  | %2 = bitcast x86_mmx %1 to <1 x i64> | 
|  | ret <1 x i64> %2 | 
|  | } | 
|  |  | 
|  | declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32 immarg) | 
|  |  | 
|  | define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind { | 
|  | ; X86-LABEL: test_mm_extract_pi16: | 
|  | ; X86:       # %bb.0: # %entry | 
|  | ; X86-NEXT:    pushl %ebp | 
|  | ; X86-NEXT:    movl %esp, %ebp | 
|  | ; X86-NEXT:    andl $-8, %esp | 
|  | ; X86-NEXT:    subl $8, %esp | 
|  | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | ; X86-NEXT:    movl 12(%ebp), %ecx | 
|  | ; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; X86-NEXT:    movl %eax, (%esp) | 
|  | ; X86-NEXT:    movq (%esp), %mm0 | 
|  | ; X86-NEXT:    pextrw $2, %mm0, %eax | 
|  | ; X86-NEXT:    movl %ebp, %esp | 
|  | ; X86-NEXT:    popl %ebp | 
|  | ; X86-NEXT:    retl | 
|  | ; | 
|  | ; X64-LABEL: test_mm_extract_pi16: | 
|  | ; X64:       # %bb.0: # %entry | 
|  | ; X64-NEXT:    movq %rdi, %mm0 | 
|  | ; X64-NEXT:    pextrw $2, %mm0, %eax | 
|  | ; X64-NEXT:    retq | 
|  | entry: | 
|  | %0 = bitcast <1 x i64> %a.coerce to x86_mmx | 
|  | %1 = tail call i32 @llvm.x86.mmx.pextr.w(x86_mmx %0, i32 2) | 
|  | ret i32 %1 | 
|  | } | 
|  |  | 
|  | declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32 immarg) |