llvm/test/CodeGen/X86/pr134602.ll - llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefix=X86
 ; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=X64

 ; Test for incorrect vector codegen due to bad handling of splats of binops containing undefs
 define i32 @PR134602(i16 %a0) {
 ; X86-LABEL: PR134602:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    orl $1, %eax
 ; X86-NEXT:    addl $3, %eax
 ; X86-NEXT:    cwtl
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: PR134602:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movzwl %di, %eax
 ; X64-NEXT:    movd %eax, %xmm0
 ; X64-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
 ; X64-NEXT:    paddw %xmm0, %xmm1
 ; X64-NEXT:    movdqa %xmm1, %xmm0
 ; X64-NEXT:    psrld $16, %xmm0
 ; X64-NEXT:    paddw %xmm1, %xmm0
 ; X64-NEXT:    movd %xmm0, %eax
 ; X64-NEXT:    cwtl
 ; X64-NEXT:    retq
   %splat= insertelement <4 x i16> zeroinitializer, i16 %a0, i64 0
   %mul = mul <4 x i16> %splat, <i16 1, i16 1, i16 0, i16 0>
   %or = or <4 x i16> splat (i16 1), %mul
   %reduce = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %or)
   %ret_32 = sext i16 %reduce to i32
   ret i32 %ret_32
 }
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=i686-- \| FileCheck %s --check-prefix=X86
	; RUN: llc < %s -mtriple=x86_64-- \| FileCheck %s --check-prefix=X64

	; Test for incorrect vector codegen due to bad handling of splats of binops containing undefs
	define i32 @PR134602(i16 %a0) {
	; X86-LABEL: PR134602:
	; X86: # %bb.0:
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: orl $1, %eax
	; X86-NEXT: addl $3, %eax
	; X86-NEXT: cwtl
	; X86-NEXT: retl
	;
	; X64-LABEL: PR134602:
	; X64: # %bb.0:
	; X64-NEXT: movzwl %di, %eax
	; X64-NEXT: movd %eax, %xmm0
	; X64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
	; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
	; X64-NEXT: paddw %xmm0, %xmm1
	; X64-NEXT: movdqa %xmm1, %xmm0
	; X64-NEXT: psrld $16, %xmm0
	; X64-NEXT: paddw %xmm1, %xmm0
	; X64-NEXT: movd %xmm0, %eax
	; X64-NEXT: cwtl
	; X64-NEXT: retq
	%splat= insertelement <4 x i16> zeroinitializer, i16 %a0, i64 0
	%mul = mul <4 x i16> %splat, <i16 1, i16 1, i16 0, i16 0>
	%or = or <4 x i16> splat (i16 1), %mul
	%reduce = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %or)
	%ret_32 = sext i16 %reduce to i32
	ret i32 %ret_32
	}