|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
|  | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s | 
|  |  | 
|  | define x86_mmx @stack_fold_cvtpd2pi(<2 x double> %a0) { | 
|  | ; CHECK-LABEL: stack_fold_cvtpd2pi: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    cvtpd2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() | 
|  | %2 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone | 
|  |  | 
|  | define <2 x double> @stack_fold_cvtpi2pd(x86_mmx %a0) { | 
|  | ; CHECK-LABEL: stack_fold_cvtpi2pd: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    cvtpi2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0) nounwind readnone | 
|  | ret <2 x double> %2 | 
|  | } | 
|  | declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone | 
|  |  | 
|  | define <4 x float> @stack_fold_cvtpi2ps(<4 x float> %a0, x86_mmx %a1) { | 
|  | ; CHECK-LABEL: stack_fold_cvtpi2ps: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    cvtpi2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, x86_mmx %a1) nounwind readnone | 
|  | ret <4 x float> %2 | 
|  | } | 
|  | declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_cvtps2pi(<4 x float> %a0) { | 
|  | ; CHECK-LABEL: stack_fold_cvtps2pi: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    cvtps2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() | 
|  | %2 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_cvttpd2pi(<2 x double> %a0) { | 
|  | ; CHECK-LABEL: stack_fold_cvttpd2pi: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    cvttpd2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() | 
|  | %2 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_cvttps2pi(<4 x float> %a0) { | 
|  | ; CHECK-LABEL: stack_fold_cvttps2pi: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    cvttps2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() | 
|  | %2 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone | 
|  |  | 
|  | ; TODO stack_fold_movd_load | 
|  |  | 
|  | ; padd forces execution on mmx | 
|  | define i32 @stack_fold_movd_store(x86_mmx %a0) nounwind { | 
|  | ; CHECK-LABEL: stack_fold_movd_store: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    pushq %rbp | 
|  | ; CHECK-NEXT:    pushq %r15 | 
|  | ; CHECK-NEXT:    pushq %r14 | 
|  | ; CHECK-NEXT:    pushq %r13 | 
|  | ; CHECK-NEXT:    pushq %r12 | 
|  | ; CHECK-NEXT:    pushq %rbx | 
|  | ; CHECK-NEXT:    paddb %mm0, %mm0 | 
|  | ; CHECK-NEXT:    movd %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; CHECK-NEXT:    popq %rbx | 
|  | ; CHECK-NEXT:    popq %r12 | 
|  | ; CHECK-NEXT:    popq %r13 | 
|  | ; CHECK-NEXT:    popq %r14 | 
|  | ; CHECK-NEXT:    popq %r15 | 
|  | ; CHECK-NEXT:    popq %rbp | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a0) | 
|  | %2 = bitcast x86_mmx %1 to <2 x i32> | 
|  | %3 = extractelement <2 x i32> %2, i32 0 | 
|  | %4 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() | 
|  | ret i32 %3 | 
|  | } | 
|  |  | 
|  | ; TODO stack_fold_movq_load | 
|  |  | 
|  | ; padd forces execution on mmx | 
|  | define i64 @stack_fold_movq_store(x86_mmx %a0) nounwind { | 
|  | ; CHECK-LABEL: stack_fold_movq_store: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    pushq %rbp | 
|  | ; CHECK-NEXT:    pushq %r15 | 
|  | ; CHECK-NEXT:    pushq %r14 | 
|  | ; CHECK-NEXT:    pushq %r13 | 
|  | ; CHECK-NEXT:    pushq %r12 | 
|  | ; CHECK-NEXT:    pushq %rbx | 
|  | ; CHECK-NEXT:    paddb %mm0, %mm0 | 
|  | ; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload | 
|  | ; CHECK-NEXT:    popq %rbx | 
|  | ; CHECK-NEXT:    popq %r12 | 
|  | ; CHECK-NEXT:    popq %r13 | 
|  | ; CHECK-NEXT:    popq %r14 | 
|  | ; CHECK-NEXT:    popq %r15 | 
|  | ; CHECK-NEXT:    popq %rbp | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a0) | 
|  | %2 = bitcast x86_mmx %1 to i64 | 
|  | %3 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() | 
|  | ret i64 %2 | 
|  | } | 
|  |  | 
|  | define x86_mmx @stack_fold_pabsb(x86_mmx %a0) { | 
|  | ; CHECK-LABEL: stack_fold_pabsb: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pabsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %a0) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pabsd(x86_mmx %a0) { | 
|  | ; CHECK-LABEL: stack_fold_pabsd: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pabsd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %a0) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pabsw(x86_mmx %a0) { | 
|  | ; CHECK-LABEL: stack_fold_pabsw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pabsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %a0) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_packssdw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_packssdw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    packssdw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_packsswb(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_packsswb: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    packsswb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_packuswb(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_packuswb: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    packuswb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_paddb(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_paddb: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    paddb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_paddd(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_paddd: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    paddd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_paddq(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_paddq: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    paddq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_paddsb(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_paddsb: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    paddsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_paddsw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_paddsw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    paddsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_paddusb(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_paddusb: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    paddusb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_paddusw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_paddusw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    paddusw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_paddw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_paddw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    paddw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_palignr(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_palignr: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    palignr $1, {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a, x86_mmx %b, i8 1) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pand(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pand: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pand {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pandn(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pandn: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pandn {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pavgb(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pavgb: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pavgb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pavgw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pavgw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pavgw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pcmpeqb(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pcmpeqb: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pcmpeqb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pcmpeqd(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pcmpeqd: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pcmpeqd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pcmpeqw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pcmpeqw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pcmpeqw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pcmpgtb(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pcmpgtb: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pcmpgtb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pcmpgtd(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pcmpgtd: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pcmpgtd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pcmpgtw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pcmpgtw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pcmpgtw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_phaddd(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_phaddd: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    phaddd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_phaddsw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_phaddsw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    phaddsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_phaddw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_phaddw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    phaddw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_phsubd(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_phsubd: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    phsubd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_phsubsw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_phsubsw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    phsubsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_phsubw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_phsubw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    phsubw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | ; TODO stack_fold_pinsrw | 
|  |  | 
|  | define x86_mmx @stack_fold_pmaddubsw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pmaddubsw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pmaddubsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pmaddwd(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pmaddwd: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pmaddwd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pmaxsw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pmaxsw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pmaxsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pmaxub(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pmaxub: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pmaxub {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pminsw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pminsw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pminsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pminub(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pminub: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pminub {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pmulhrsw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pmulhrsw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pmulhrsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pmulhuw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pmulhuw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pmulhuw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pmulhw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pmulhw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pmulhw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pmullw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pmullw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pmullw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pmuludq(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pmuludq: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pmuludq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_por(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_por: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    por {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psadbw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psadbw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psadbw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pshufb(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pshufb: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Reload | 
|  | ; CHECK-NEXT:    pshufb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pshufw(x86_mmx %a) { | 
|  | ; CHECK-LABEL: stack_fold_pshufw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pshufw $1, {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    # mm0 = mem[1,0,0,0] | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %a, i8 1) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psignb(x86_mmx %a0, x86_mmx %a1) { | 
|  | ; CHECK-LABEL: stack_fold_psignb: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psignb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psignd(x86_mmx %a0, x86_mmx %a1) { | 
|  | ; CHECK-LABEL: stack_fold_psignd: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psignd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psignw(x86_mmx %a0, x86_mmx %a1) { | 
|  | ; CHECK-LABEL: stack_fold_psignw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psignw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pslld(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pslld: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pslld {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psllq(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psllq: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psllq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psllw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psllw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psllw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psrad(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psrad: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psrad {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psraw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psraw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psraw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psrld(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psrld: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psrld {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psrlq(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psrlq: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psrlq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psrlw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psrlw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psrlw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psubb(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psubb: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psubb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psubd(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psubd: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psubd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psubq(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psubq: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psubq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psubsb(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psubsb: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psubsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psubsw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psubsw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psubsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psubusb(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psubusb: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psubusb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psubusw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psubusw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psubusw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_psubw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_psubw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    psubw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_punpckhbw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_punpckhbw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    punpckhbw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_punpckhdq(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_punpckhdq: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    punpckhdq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    # mm0 = mm0[1],mem[1] | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_punpckhwd(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_punpckhwd: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    punpckhwd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    # mm0 = mm0[2],mem[2],mm0[3],mem[3] | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_punpcklbw(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_punpcklbw: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    punpcklbw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_punpckldq(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_punpckldq: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    # mm0 = mm0[0],mem[0] | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_punpcklwd(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_punpcklwd: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    # mm0 = mm0[0],mem[0],mm0[1],mem[1] | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone | 
|  |  | 
|  | define x86_mmx @stack_fold_pxor(x86_mmx %a, x86_mmx %b) { | 
|  | ; CHECK-LABEL: stack_fold_pxor: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill | 
|  | ; CHECK-NEXT:    #APP | 
|  | ; CHECK-NEXT:    nop | 
|  | ; CHECK-NEXT:    #NO_APP | 
|  | ; CHECK-NEXT:    pxor {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    movq2dq %mm0, %xmm0 | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() | 
|  | %2 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a, x86_mmx %b) nounwind readnone | 
|  | ret x86_mmx %2 | 
|  | } | 
|  | declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone |