blob: 948c49b3dc867ed1760f9205750ba467afc60b6c [file]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64
;; Use cttz to test if we properly prove never-zero. There is a very
;; simple transform from cttz -> cttz_zero_undef if its operand is
;; known never zero.
define i32 @or_known_nonzero(i32 %x) {
; X86-LABEL: or_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $1, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: or_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: orl $1, %edi
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%z = or i32 %x, 1
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @or_known_nonzero_vec(<4 x i32> %x, ptr %p) {
; X86-LABEL: or_known_nonzero_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: or_known_nonzero_vec:
; X64: # %bb.0:
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%z = or <4 x i32> %x, <i32 1, i32 0, i32 0, i32 0>
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @or_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: or_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: or_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: orl %esi, %edi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%z = or i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @select_known_nonzero(i1 %c, i32 %x) {
; X86-LABEL: select_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $1, %eax
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $122, %ecx
; X86-NEXT: cmovnel %eax, %ecx
; X86-NEXT: rep bsfl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: select_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: orl $1, %esi
; X64-NEXT: testb $1, %dil
; X64-NEXT: movl $122, %eax
; X64-NEXT: cmovnel %esi, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%y = or i32 %x, 1
%z = select i1 %c, i32 %y, i32 122
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @select_maybe_zero(i1 %c, i32 %x) {
; X86-LABEL: select_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $1, %eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
; X86-NEXT: cmovnel %eax, %ecx
; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: select_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: orl $1, %esi
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: testb $1, %dil
; X64-NEXT: cmovnel %esi, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%y = or i32 %x, 1
%z = select i1 %c, i32 %y, i32 0
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @extractelt_nonzero_vec(<4 x i32> %a0, ptr %p1, i32 %a2) {
; X86-LABEL: extractelt_nonzero_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: pxor %xmm1, %xmm1
; X86-NEXT: pcmpgtd %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-NEXT: por %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, (%eax)
; X86-NEXT: movd %xmm1, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: extractelt_nonzero_vec:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} xmm1 = [8,4294967295,4294967295,4294967295]
; X64-NEXT: vblendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
; X64-NEXT: vmovaps %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%cmp = icmp sgt <4 x i32> zeroinitializer, %a0
%sel = select <4 x i1> %cmp, <4 x i32> <i32 4, i32 0, i32 0, i32 0>, <4 x i32> <i32 8, i32 -1, i32 -1, i32 -1>
store <4 x i32> %sel, ptr %p1
%shl0 = extractelement <4 x i32> %sel, i32 0
%res = call i32 @llvm.cttz.i32(i32 %shl0, i1 0)
ret i32 %res
}
define i32 @extractelt_nonzero_vec_fail0(<4 x i32> %a0, ptr %p1, i32 %a2) {
; X86-LABEL: extractelt_nonzero_vec_fail0:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl $3, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: pxor %xmm1, %xmm1
; X86-NEXT: pcmpgtd %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-NEXT: por %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, (%ecx)
; X86-NEXT: bsfl (%ecx,%eax,4), %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: extractelt_nonzero_vec_fail0:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: vmovaps {{.*#+}} xmm1 = [8,4294967295,4294967295,4294967295]
; X64-NEXT: vblendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
; X64-NEXT: vmovaps %xmm0, (%rdi)
; X64-NEXT: andl $3, %esi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl (%rdi,%rsi,4), %eax
; X64-NEXT: retq
%cmp = icmp sgt <4 x i32> zeroinitializer, %a0
%sel = select <4 x i1> %cmp, <4 x i32> <i32 4, i32 0, i32 0, i32 0>, <4 x i32> <i32 8, i32 -1, i32 -1, i32 -1>
store <4 x i32> %sel, ptr %p1
%shl0 = extractelement <4 x i32> %sel, i32 %a2
%res = call i32 @llvm.cttz.i32(i32 %shl0, i1 0)
ret i32 %res
}
define i32 @shl_known_nonzero_1s_bit_set(i32 %x) {
; X86-LABEL: shl_known_nonzero_1s_bit_set:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $123, %eax
; X86-NEXT: shll %cl, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: shl_known_nonzero_1s_bit_set:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $123, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%z = shl i32 123, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @shl_known_nonzero_1s_bit_set_vec(<4 x i32> %x, ptr %p) {
; X86-LABEL: shl_known_nonzero_1s_bit_set_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: pslld $23, %xmm0
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: cvttps2dq %xmm0, %xmm0
; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [123,0,0,0]
; X86-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,0,4294967295,0]
; X86-NEXT: pand %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, (%eax)
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: shl_known_nonzero_1s_bit_set_vec:
; X64: # %bb.0:
; X64-NEXT: vpslld $23, %xmm0, %xmm0
; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vcvttps2dq %xmm0, %xmm0
; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [123,0,0,0]
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%z = shl <4 x i32> <i32 123, i32 0, i32 0, i32 0>, %x
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @shl_known_nonzero_nsw(i32 %x, i32 %yy) {
; X86-LABEL: shl_known_nonzero_nsw:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll %cl, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: shl_known_nonzero_nsw:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %esi
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
%y = or i32 %yy, 256
%z = shl nsw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @shl_known_nonzero_nsw_vec(<4 x i32> %x, <4 x i32> %yy, ptr %p) {
; X86-LABEL: shl_known_nonzero_nsw_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-NEXT: pslld $23, %xmm0
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: cvttps2dq %xmm0, %xmm0
; X86-NEXT: pmuludq %xmm0, %xmm1
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; X86-NEXT: pmuludq %xmm2, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
; X86-NEXT: movdqa %xmm3, (%eax)
; X86-NEXT: movd %xmm1, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: shl_known_nonzero_nsw_vec:
; X64: # %bb.0:
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; X64-NEXT: vpslld $23, %xmm0, %xmm0
; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vcvttps2dq %xmm0, %xmm0
; X64-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%y = or <4 x i32> %yy, <i32 256, i32 0, i32 0, i32 0>
%z = shl nsw <4 x i32> %y, %x
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @shl_known_nonzero_nuw(i32 %x, i32 %yy) {
; X86-LABEL: shl_known_nonzero_nuw:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll %cl, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: shl_known_nonzero_nuw:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %esi
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
%y = or i32 %yy, 256
%z = shl nuw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @shl_known_nonzero_nuw_vec(<4 x i32> %x, <4 x i32> %yy, ptr %p) {
; X86-LABEL: shl_known_nonzero_nuw_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-NEXT: pslld $23, %xmm0
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: cvttps2dq %xmm0, %xmm0
; X86-NEXT: pmuludq %xmm0, %xmm1
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; X86-NEXT: pmuludq %xmm2, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
; X86-NEXT: movdqa %xmm3, (%eax)
; X86-NEXT: movd %xmm1, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: shl_known_nonzero_nuw_vec:
; X64: # %bb.0:
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; X64-NEXT: vpslld $23, %xmm0, %xmm0
; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vcvttps2dq %xmm0, %xmm0
; X64-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%y = or <4 x i32> %yy, <i32 256, i32 0, i32 0, i32 0>
%z = shl nuw <4 x i32> %y, %x
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @shl_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: shl_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll %cl, %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: shl_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %esi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
%z = shl nuw nsw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @uaddsat_known_nonzero(i32 %x) {
; X86-LABEL: uaddsat_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: incl %eax
; X86-NEXT: movl $-1, %ecx
; X86-NEXT: cmovnel %eax, %ecx
; X86-NEXT: rep bsfl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: uaddsat_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: incl %edi
; X64-NEXT: movl $-1, %eax
; X64-NEXT: cmovnel %edi, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 1)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @uaddsat_known_nonzero_vec(<16 x i8> %x, ptr %p) {
; X86-LABEL: uaddsat_known_nonzero_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: paddusb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: movzbl (%eax), %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: uaddsat_known_nonzero_vec:
; X64: # %bb.0:
; X64-NEXT: vpaddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vpextrb $0, %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%z = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> <i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
store <16 x i8> %z, ptr %p
%e = extractelement <16 x i8> %z, i32 0
%ex = zext i8 %e to i32
%r = call i32 @llvm.cttz.i32(i32 %ex, i1 false)
ret i32 %r
}
define i32 @uaddsat_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: uaddsat_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl $-1, %ecx
; X86-NEXT: cmovael %eax, %ecx
; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: uaddsat_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: addl %esi, %edi
; X64-NEXT: movl $-1, %ecx
; X64-NEXT: cmovael %edi, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @umax_known_nonzero(i32 %x, i32 %y) {
; X86-LABEL: umax_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $4, %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: cmpl %edx, %eax
; X86-NEXT: cmoval %eax, %edx
; X86-NEXT: rep bsfl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: umax_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: movl $4, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: cmpl %eax, %edi
; X64-NEXT: cmoval %edi, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%yy = shl nuw i32 4, %y
%z = call i32 @llvm.umax.i32(i32 %x, i32 %yy)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @umax_known_nonzero_vec(<16 x i8> %x, ptr %p) {
; X86-LABEL: umax_known_nonzero_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movdqa %xmm0, %xmm3
; X86-NEXT: psllw $5, %xmm3
; X86-NEXT: pxor %xmm2, %xmm2
; X86-NEXT: pxor %xmm1, %xmm1
; X86-NEXT: pcmpgtb %xmm3, %xmm1
; X86-NEXT: movdqa %xmm1, %xmm4
; X86-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}, %xmm4
; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-NEXT: por %xmm4, %xmm1
; X86-NEXT: paddb %xmm3, %xmm3
; X86-NEXT: pxor %xmm4, %xmm4
; X86-NEXT: pcmpgtb %xmm3, %xmm4
; X86-NEXT: movdqa %xmm4, %xmm5
; X86-NEXT: pandn %xmm1, %xmm5
; X86-NEXT: paddb %xmm1, %xmm1
; X86-NEXT: paddb %xmm1, %xmm1
; X86-NEXT: pand %xmm4, %xmm1
; X86-NEXT: por %xmm5, %xmm1
; X86-NEXT: paddb %xmm3, %xmm3
; X86-NEXT: pcmpgtb %xmm3, %xmm2
; X86-NEXT: movdqa %xmm2, %xmm3
; X86-NEXT: pandn %xmm1, %xmm3
; X86-NEXT: paddb %xmm1, %xmm1
; X86-NEXT: pand %xmm2, %xmm1
; X86-NEXT: por %xmm3, %xmm1
; X86-NEXT: pmaxub %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, (%eax)
; X86-NEXT: movzbl (%eax), %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: umax_known_nonzero_vec:
; X64: # %bb.0:
; X64-NEXT: vpsllw $5, %xmm0, %xmm1
; X64-NEXT: vpmovsxbq {{.*#+}} xmm2 = [4,0]
; X64-NEXT: vpblendvb %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; X64-NEXT: vpaddb %xmm2, %xmm2, %xmm3
; X64-NEXT: vpaddb %xmm3, %xmm3, %xmm3
; X64-NEXT: vpaddb %xmm1, %xmm1, %xmm1
; X64-NEXT: vpblendvb %xmm1, %xmm3, %xmm2, %xmm2
; X64-NEXT: vpaddb %xmm2, %xmm2, %xmm3
; X64-NEXT: vpaddb %xmm1, %xmm1, %xmm1
; X64-NEXT: vpblendvb %xmm1, %xmm3, %xmm2, %xmm1
; X64-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vpextrb $0, %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%yy = shl nuw <16 x i8> <i8 4, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, %x
%z = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %x, <16 x i8> %yy)
store <16 x i8> %z, ptr %p
%e = extractelement <16 x i8> %z, i32 0
%ex = zext i8 %e to i32
%r = call i32 @llvm.cttz.i32(i32 %ex, i1 false)
ret i32 %r
}
define i32 @umax_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: umax_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmpl %eax, %ecx
; X86-NEXT: cmoval %ecx, %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: umax_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: cmpl %esi, %edi
; X64-NEXT: cmoval %edi, %esi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
%z = call i32 @llvm.umax.i32(i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @umin_known_nonzero(i32 %xx, i32 %yy) {
; X86-LABEL: umin_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $4, %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: addl $4, %eax
; X86-NEXT: cmpl %eax, %edx
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: umin_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $4, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: addl $4, %esi
; X64-NEXT: cmpl %esi, %eax
; X64-NEXT: cmovbl %eax, %esi
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
%x = shl nuw i32 4, %xx
%y = add nuw nsw i32 %yy, 4
%z = call i32 @llvm.umin.i32(i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @umin_known_nonzero_vec(<16 x i8> %x, ptr %p) {
; X86-LABEL: umin_known_nonzero_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movdqa %xmm0, %xmm3
; X86-NEXT: psllw $5, %xmm3
; X86-NEXT: pxor %xmm2, %xmm2
; X86-NEXT: pxor %xmm1, %xmm1
; X86-NEXT: pcmpgtb %xmm3, %xmm1
; X86-NEXT: movdqa %xmm1, %xmm4
; X86-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}, %xmm4
; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-NEXT: por %xmm4, %xmm1
; X86-NEXT: paddb %xmm3, %xmm3
; X86-NEXT: pxor %xmm4, %xmm4
; X86-NEXT: pcmpgtb %xmm3, %xmm4
; X86-NEXT: movdqa %xmm4, %xmm5
; X86-NEXT: pandn %xmm1, %xmm5
; X86-NEXT: paddb %xmm1, %xmm1
; X86-NEXT: paddb %xmm1, %xmm1
; X86-NEXT: pand %xmm4, %xmm1
; X86-NEXT: por %xmm5, %xmm1
; X86-NEXT: paddb %xmm3, %xmm3
; X86-NEXT: pcmpgtb %xmm3, %xmm2
; X86-NEXT: movdqa %xmm2, %xmm3
; X86-NEXT: pandn %xmm1, %xmm3
; X86-NEXT: paddb %xmm1, %xmm1
; X86-NEXT: pand %xmm2, %xmm1
; X86-NEXT: por %xmm3, %xmm1
; X86-NEXT: pminub %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, (%eax)
; X86-NEXT: movzbl (%eax), %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: umin_known_nonzero_vec:
; X64: # %bb.0:
; X64-NEXT: vpsllw $5, %xmm0, %xmm1
; X64-NEXT: vpmovsxbq {{.*#+}} xmm2 = [4,0]
; X64-NEXT: vpblendvb %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; X64-NEXT: vpaddb %xmm2, %xmm2, %xmm3
; X64-NEXT: vpaddb %xmm3, %xmm3, %xmm3
; X64-NEXT: vpaddb %xmm1, %xmm1, %xmm1
; X64-NEXT: vpblendvb %xmm1, %xmm3, %xmm2, %xmm2
; X64-NEXT: vpaddb %xmm2, %xmm2, %xmm3
; X64-NEXT: vpaddb %xmm1, %xmm1, %xmm1
; X64-NEXT: vpblendvb %xmm1, %xmm3, %xmm2, %xmm1
; X64-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vpextrb $0, %xmm0, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%yy = shl nuw <16 x i8> <i8 4, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, %x
%z = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %x, <16 x i8> %yy)
store <16 x i8> %z, ptr %p
%e = extractelement <16 x i8> %z, i32 0
%ex = zext i8 %e to i32
%r = call i32 @llvm.cttz.i32(i32 %ex, i1 false)
ret i32 %r
}
define i32 @umin_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: umin_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl $54, %eax
; X86-NEXT: movl $54, %ecx
; X86-NEXT: cmovbl %eax, %ecx
; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: umin_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: cmpl $54, %edi
; X64-NEXT: movl $54, %ecx
; X64-NEXT: cmovbl %edi, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%z = call i32 @llvm.umin.i32(i32 %x, i32 54)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @smin_known_nonzero(i32 %xx, i32 %yy) {
; X86-LABEL: smin_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $4, %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: addl $4, %eax
; X86-NEXT: cmpl %eax, %edx
; X86-NEXT: cmovll %edx, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: smin_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $4, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: addl $4, %esi
; X64-NEXT: cmpl %esi, %eax
; X64-NEXT: cmovll %eax, %esi
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
%x = shl nuw i32 4, %xx
%y = add nuw nsw i32 %yy, 4
%z = call i32 @llvm.smin.i32(i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @smin_known_zero(i32 %x, i32 %y) {
; X86-LABEL: smin_known_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl $-54, %eax
; X86-NEXT: movl $-54, %ecx
; X86-NEXT: cmovll %eax, %ecx
; X86-NEXT: rep bsfl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: smin_known_zero:
; X64: # %bb.0:
; X64-NEXT: cmpl $-54, %edi
; X64-NEXT: movl $-54, %eax
; X64-NEXT: cmovll %edi, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%z = call i32 @llvm.smin.i32(i32 %x, i32 -54)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
; X86-LABEL: smin_known_zero_vec:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = [4294967242,4294967273,4294967284,4294967295]
; X86-NEXT: movdqa %xmm1, %xmm2
; X86-NEXT: pcmpgtd %xmm0, %xmm2
; X86-NEXT: pand %xmm2, %xmm0
; X86-NEXT: pandn %xmm1, %xmm2
; X86-NEXT: por %xmm2, %xmm0
; X86-NEXT: pcmpeqd %xmm1, %xmm1
; X86-NEXT: paddd %xmm0, %xmm1
; X86-NEXT: pand %xmm1, %xmm0
; X86-NEXT: pxor %xmm1, %xmm1
; X86-NEXT: pcmpeqd %xmm1, %xmm0
; X86-NEXT: psrld $31, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: smin_known_zero_vec:
; X64: # %bb.0:
; X64-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; X64-NEXT: vpsrld $31, %xmm0, %xmm0
; X64-NEXT: retq
%z = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> <i32 -54, i32 -23, i32 -12, i32 -1>)
%r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z)
%3 = icmp eq <4 x i32> %r, <i32 1, i32 1, i32 1, i32 1>
%ret = zext <4 x i1> %3 to <4 x i32>
ret <4 x i32> %ret
}
define i32 @smin_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: smin_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl $54, %eax
; X86-NEXT: movl $54, %ecx
; X86-NEXT: cmovll %eax, %ecx
; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: smin_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: cmpl $54, %edi
; X64-NEXT: movl $54, %ecx
; X64-NEXT: cmovll %edi, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%z = call i32 @llvm.smin.i32(i32 %x, i32 54)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @smin_known_never_zero_vec_element(<4 x i32> %x) {
; X86-LABEL: smin_known_never_zero_vec_element:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = [54,4294967273,12,1]
; X86-NEXT: movdqa %xmm1, %xmm2
; X86-NEXT: pcmpgtd %xmm0, %xmm2
; X86-NEXT: pand %xmm2, %xmm0
; X86-NEXT: pandn %xmm1, %xmm2
; X86-NEXT: por %xmm0, %xmm2
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: smin_known_never_zero_vec_element:
; X64: # %bb.0:
; X64-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpextrd $1, %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%z = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> <i32 54, i32 -23, i32 12, i32 1>)
%el = extractelement <4 x i32> %z, i32 1
%r = call i32 @llvm.cttz.i32(i32 %el, i1 false)
ret i32 %r
}
define i32 @smax_known_nonzero(i32 %xx, i32 %yy) {
; X86-LABEL: smax_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $4, %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: addl $4, %eax
; X86-NEXT: cmpl %eax, %edx
; X86-NEXT: cmovgl %edx, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: smax_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $4, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: addl $4, %esi
; X64-NEXT: cmpl %esi, %eax
; X64-NEXT: cmovgl %eax, %esi
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
%x = shl nuw i32 4, %xx
%y = add nuw nsw i32 %yy, 4
%z = call i32 @llvm.smax.i32(i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @smax_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: smax_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl $55, %eax
; X86-NEXT: movl $54, %ecx
; X86-NEXT: cmovgel %eax, %ecx
; X86-NEXT: rep bsfl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: smax_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: cmpl $55, %edi
; X64-NEXT: movl $54, %eax
; X64-NEXT: cmovgel %edi, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%z = call i32 @llvm.smax.i32(i32 %x, i32 54)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
; X86-LABEL: smax_known_zero_vec:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = [54,23,12,1]
; X86-NEXT: movdqa %xmm0, %xmm2
; X86-NEXT: pcmpgtd %xmm1, %xmm2
; X86-NEXT: pand %xmm2, %xmm0
; X86-NEXT: pandn %xmm1, %xmm2
; X86-NEXT: por %xmm2, %xmm0
; X86-NEXT: pcmpeqd %xmm1, %xmm1
; X86-NEXT: paddd %xmm0, %xmm1
; X86-NEXT: pand %xmm1, %xmm0
; X86-NEXT: pxor %xmm1, %xmm1
; X86-NEXT: pcmpeqd %xmm1, %xmm0
; X86-NEXT: psrld $31, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: smax_known_zero_vec:
; X64: # %bb.0:
; X64-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; X64-NEXT: vpsrld $31, %xmm0, %xmm0
; X64-NEXT: retq
%z = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> <i32 54, i32 23, i32 12, i32 1>)
%r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z)
%3 = icmp eq <4 x i32> %r, <i32 1, i32 1, i32 1, i32 1>
%ret = zext <4 x i1> %3 to <4 x i32>
ret <4 x i32> %ret
}
define i32 @smax_known_zero(i32 %x, i32 %y) {
; X86-LABEL: smax_known_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testl %eax, %eax
; X86-NEXT: movl $-1, %ecx
; X86-NEXT: cmovnsl %eax, %ecx
; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: smax_known_zero:
; X64: # %bb.0:
; X64-NEXT: testl %edi, %edi
; X64-NEXT: movl $-1, %ecx
; X64-NEXT: cmovnsl %edi, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%z = call i32 @llvm.smax.i32(i32 %x, i32 -1)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @smax_known_never_zero_vec_element(<4 x i32> %x) {
; X86-LABEL: smax_known_never_zero_vec_element:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = [54,4294967273,4294967284,4294967295]
; X86-NEXT: movdqa %xmm0, %xmm2
; X86-NEXT: pcmpgtd %xmm1, %xmm2
; X86-NEXT: pand %xmm2, %xmm0
; X86-NEXT: pandn %xmm1, %xmm2
; X86-NEXT: por %xmm0, %xmm2
; X86-NEXT: movd %xmm2, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: smax_known_never_zero_vec_element:
; X64: # %bb.0:
; X64-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%z = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> <i32 54, i32 -23, i32 -12, i32 -1>)
%el = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %el, i1 false)
ret i32 %r
}
define i32 @rotr_known_nonzero(i32 %xx, i32 %y) {
; X86-LABEL: rotr_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorl %cl, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotr_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: orl $256, %edi # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: rorl %cl, %edi
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%x = or i32 %xx, 256
%shr = lshr i32 %x, %y
%sub = sub i32 32, %y
%shl = shl i32 %x, %sub
%z = or i32 %shl, %shr
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @rotr_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: rotr_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorl %cl, %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotr_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: rorl %cl, %edi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%shr = lshr i32 %x, %y
%sub = sub i32 32, %y
%shl = shl i32 %x, %sub
%z = or i32 %shl, %shr
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @rotr_with_fshr_known_nonzero(i32 %xx, i32 %y) {
; X86-LABEL: rotr_with_fshr_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorl %cl, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotr_with_fshr_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: orl $256, %edi # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: rorl %cl, %edi
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%x = or i32 %xx, 256
%z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @rotr_with_fshr_known_nonzero_vec(<4 x i32> %xx, <4 x i32> %y, ptr %p) {
; X86-LABEL: rotr_with_fshr_known_nonzero_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: pxor %xmm3, %xmm3
; X86-NEXT: psubd %xmm1, %xmm3
; X86-NEXT: pslld $23, %xmm3
; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3
; X86-NEXT: cvttps2dq %xmm3, %xmm1
; X86-NEXT: pmuludq %xmm1, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X86-NEXT: pmuludq %xmm2, %xmm1
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-NEXT: por %xmm3, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotr_with_fshr_known_nonzero_vec:
; X64: # %bb.0:
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; X64-NEXT: vpxor %xmm3, %xmm3, %xmm3
; X64-NEXT: vpsubd %xmm1, %xmm3, %xmm1
; X64-NEXT: vpslld $23, %xmm1, %xmm1
; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; X64-NEXT: vcvttps2dq %xmm1, %xmm1
; X64-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; X64-NEXT: vpmuludq %xmm3, %xmm0, %xmm0
; X64-NEXT: vpmuludq %xmm1, %xmm2, %xmm1
; X64-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
; X64-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
; X64-NEXT: vpor %xmm2, %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or <4 x i32> %xx, <i32 256, i32 0, i32 0, i32 0>
%z = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %y)
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @rotr_with_fshr_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: rotr_with_fshr_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorl %cl, %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotr_with_fshr_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: rorl %cl, %edi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @rotl_known_nonzero(i32 %xx, i32 %y) {
; X86-LABEL: rotl_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: roll %cl, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotl_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: orl $256, %edi # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: roll %cl, %edi
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%x = or i32 %xx, 256
%shl = shl i32 %x, %y
%sub = sub i32 32, %y
%shr = lshr i32 %x, %sub
%z = or i32 %shr, %shl
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @rotl_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: rotl_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: roll %cl, %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotl_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: roll %cl, %edi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%shl = shl i32 %x, %y
%sub = sub i32 32, %y
%shr = lshr i32 %x, %sub
%z = or i32 %shr, %shl
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @rotl_with_fshl_known_nonzero(i32 %xx, i32 %y) {
; X86-LABEL: rotl_with_fshl_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: roll %cl, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotl_with_fshl_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: orl $256, %edi # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: roll %cl, %edi
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%x = or i32 %xx, 256
%z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @rotl_with_fshl_known_nonzero_vec(<4 x i32> %xx, <4 x i32> %y, ptr %p) {
; X86-LABEL: rotl_with_fshl_known_nonzero_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: pslld $23, %xmm1
; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-NEXT: cvttps2dq %xmm1, %xmm1
; X86-NEXT: pmuludq %xmm1, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X86-NEXT: pmuludq %xmm2, %xmm1
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-NEXT: por %xmm3, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotl_with_fshl_known_nonzero_vec:
; X64: # %bb.0:
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; X64-NEXT: vpslld $23, %xmm1, %xmm1
; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; X64-NEXT: vcvttps2dq %xmm1, %xmm1
; X64-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
; X64-NEXT: vpmuludq %xmm3, %xmm0, %xmm0
; X64-NEXT: vpmuludq %xmm1, %xmm2, %xmm1
; X64-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
; X64-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
; X64-NEXT: vpor %xmm2, %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or <4 x i32> %xx, <i32 256, i32 0, i32 0, i32 0>
%z = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %y)
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @rotl_with_fshl_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: rotl_with_fshl_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: roll %cl, %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotl_with_fshl_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: roll %cl, %edi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @sra_known_nonzero_sign_bit_set(i32 %x) {
; X86-LABEL: sra_known_nonzero_sign_bit_set:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B
; X86-NEXT: sarl %cl, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sra_known_nonzero_sign_bit_set:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: sarl %cl, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%z = ashr i32 2147606891, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @sra_known_nonzero_sign_bit_set_vec(<4 x i32> %x, ptr %p) {
; X86-LABEL: sra_known_nonzero_sign_bit_set_vec:
; X86: # %bb.0:
; X86-NEXT: xorps %xmm1, %xmm1
; X86-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movdqa {{.*#+}} xmm0 = [2147606891,65535,1,0]
; X86-NEXT: psrad %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sra_known_nonzero_sign_bit_set_vec:
; X64: # %bb.0:
; X64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [2147606891,65535,1,0]
; X64-NEXT: vpsrad %xmm0, %xmm1, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%xx = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> zeroinitializer
%z = ashr <4 x i32> <i32 2147606891, i32 65535, i32 1, i32 0>, %xx
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @sra_known_nonzero_exact(i32 %x, i32 %yy) {
; X86-LABEL: sra_known_nonzero_exact:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sarl %cl, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sra_known_nonzero_exact:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: sarl %cl, %esi
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
%y = or i32 %yy, 256
%z = ashr exact i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @sra_known_nonzero_exact_vec(<4 x i32> %x, <4 x i32> %yy, ptr %p) {
; X86-LABEL: sra_known_nonzero_exact_vec:
; X86: # %bb.0:
; X86-NEXT: xorps %xmm2, %xmm2
; X86-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-NEXT: psrad %xmm2, %xmm1
; X86-NEXT: movdqa %xmm1, (%eax)
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sra_known_nonzero_exact_vec:
; X64: # %bb.0:
; X64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; X64-NEXT: vpsrad %xmm0, %xmm1, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vpextrd $1, %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x.splat = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> zeroinitializer
%y = or <4 x i32> %yy, <i32 0, i32 256, i32 0, i32 0>
%z = ashr exact <4 x i32> %y, %x.splat
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 1
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @sra_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: sra_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sarl %cl, %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: sra_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: sarl %cl, %esi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
%z = ashr exact i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @srl_known_nonzero_sign_bit_set(i32 %x) {
; X86-LABEL: srl_known_nonzero_sign_bit_set:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B
; X86-NEXT: shrl %cl, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: srl_known_nonzero_sign_bit_set:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrl %cl, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%z = lshr i32 2147606891, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @srl_known_nonzero_sign_bit_set_vec(<4 x i32> %x, ptr %p) {
; X86-LABEL: srl_known_nonzero_sign_bit_set_vec:
; X86: # %bb.0:
; X86-NEXT: xorps %xmm1, %xmm1
; X86-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movdqa {{.*#+}} xmm0 = [0,65535,2147606891,0]
; X86-NEXT: psrld %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: srl_known_nonzero_sign_bit_set_vec:
; X64: # %bb.0:
; X64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [0,65535,2147606891,0]
; X64-NEXT: vpsrld %xmm0, %xmm1, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vpextrd $2, %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x.splat = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> zeroinitializer
%z = lshr <4 x i32> <i32 0, i32 65535, i32 2147606891, i32 0>, %x.splat
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 2
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @srl_known_nonzero_exact(i32 %x, i32 %yy) {
; X86-LABEL: srl_known_nonzero_exact:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrl %cl, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: srl_known_nonzero_exact:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrl %cl, %esi
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
%y = or i32 %yy, 256
%z = lshr exact i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @srl_known_nonzero_exact_vec(<4 x i32> %x, <4 x i32> %yy, ptr %p) {
; X86-LABEL: srl_known_nonzero_exact_vec:
; X86: # %bb.0:
; X86-NEXT: xorps %xmm2, %xmm2
; X86-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-NEXT: psrld %xmm2, %xmm1
; X86-NEXT: movdqa %xmm1, (%eax)
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3]
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: srl_known_nonzero_exact_vec:
; X64: # %bb.0:
; X64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; X64-NEXT: vpsrld %xmm0, %xmm1, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vpextrd $3, %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x.splat = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> zeroinitializer
%y = or <4 x i32> %yy, <i32 0, i32 0, i32 0, i32 256>
%z = lshr exact <4 x i32> %y, %x.splat
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 3
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @srl_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: srl_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrl %cl, %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: srl_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrl %cl, %esi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
%z = lshr exact i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @udiv_known_nonzero(i32 %xx, i32 %y) {
; X86-LABEL: udiv_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $64, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divl {{[0-9]+}}(%esp)
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: udiv_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl $64, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %esi
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or i32 %xx, 64
%z = udiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @udiv_known_nonzero_vec(<4 x i32> %xx, <4 x i32> %y, ptr %p) nounwind {
; X86-LABEL: udiv_known_nonzero_vec:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
; X86-NEXT: movd %xmm2, %ecx
; X86-NEXT: movl $-1, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divl %ecx
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: movd %xmm1, %ecx
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divl %ecx
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movd %eax, %xmm2
; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3]
; X86-NEXT: movd %xmm3, %edi
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
; X86-NEXT: movd %xmm3, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divl %edi
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X86-NEXT: movd %xmm1, %edi
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divl %edi
; X86-NEXT: movd %eax, %xmm0
; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
; X86-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
; X86-NEXT: movdqa %xmm2, (%esi)
; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: udiv_known_nonzero_vec:
; X64: # %bb.0:
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpextrd $1, %xmm1, %ecx
; X64-NEXT: vpextrd $1, %xmm0, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %ecx
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: vmovd %xmm1, %esi
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %esi
; X64-NEXT: vmovd %eax, %xmm2
; X64-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
; X64-NEXT: vpextrd $2, %xmm1, %ecx
; X64-NEXT: vpextrd $2, %xmm0, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %ecx
; X64-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
; X64-NEXT: vpextrd $3, %xmm1, %ecx
; X64-NEXT: vpextrd $3, %xmm0, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %ecx
; X64-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%x = or <4 x i32> %xx, <i32 64, i32 -1, i32 0, i32 0>
%z = udiv exact <4 x i32> %x, %y
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: udiv_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divl {{[0-9]+}}(%esp)
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: udiv_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %esi
; X64-NEXT: movl $32, %ecx
; X64-NEXT: rep bsfl %eax, %ecx
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
%z = udiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @sdiv_known_nonzero(i32 %xx, i32 %y) {
; X86-LABEL: sdiv_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $64, %eax
; X86-NEXT: cltd
; X86-NEXT: idivl {{[0-9]+}}(%esp)
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sdiv_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl $64, %eax
; X64-NEXT: cltd
; X64-NEXT: idivl %esi
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or i32 %xx, 64
%z = sdiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @sdiv_known_nonzero_vec(<4 x i32> %xx, <4 x i32> %y, ptr %p) nounwind {
; X86-LABEL: sdiv_known_nonzero_vec:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
; X86-NEXT: movd %xmm2, %ecx
; X86-NEXT: movl $-1, %eax
; X86-NEXT: cltd
; X86-NEXT: idivl %ecx
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: movd %xmm1, %ecx
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: cltd
; X86-NEXT: idivl %ecx
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movd %eax, %xmm2
; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3]
; X86-NEXT: movd %xmm3, %edi
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
; X86-NEXT: movd %xmm3, %eax
; X86-NEXT: cltd
; X86-NEXT: idivl %edi
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X86-NEXT: movd %xmm1, %edi
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: cltd
; X86-NEXT: idivl %edi
; X86-NEXT: movd %eax, %xmm0
; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
; X86-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
; X86-NEXT: movdqa %xmm2, (%esi)
; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: sdiv_known_nonzero_vec:
; X64: # %bb.0:
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpextrd $1, %xmm1, %ecx
; X64-NEXT: vpextrd $1, %xmm0, %eax
; X64-NEXT: cltd
; X64-NEXT: idivl %ecx
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: vmovd %xmm1, %esi
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: cltd
; X64-NEXT: idivl %esi
; X64-NEXT: vmovd %eax, %xmm2
; X64-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
; X64-NEXT: vpextrd $2, %xmm1, %ecx
; X64-NEXT: vpextrd $2, %xmm0, %eax
; X64-NEXT: cltd
; X64-NEXT: idivl %ecx
; X64-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
; X64-NEXT: vpextrd $3, %xmm1, %ecx
; X64-NEXT: vpextrd $3, %xmm0, %eax
; X64-NEXT: cltd
; X64-NEXT: idivl %ecx
; X64-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%x = or <4 x i32> %xx, <i32 64, i32 -1, i32 0, i32 0>
%z = sdiv exact <4 x i32> %x, %y
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @sdiv_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: sdiv_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cltd
; X86-NEXT: idivl {{[0-9]+}}(%esp)
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: sdiv_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: cltd
; X64-NEXT: idivl %esi
; X64-NEXT: movl $32, %ecx
; X64-NEXT: rep bsfl %eax, %ecx
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
%z = sdiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @add_known_nonzero(i32 %xx, i32 %y) {
; X86-LABEL: add_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $1, %eax
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: add_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: orl $1, %edi
; X64-NEXT: addl %esi, %edi
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%x = or i32 %xx, 1
%z = add nuw i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @add_maybe_zero(i32 %xx, i32 %y) {
; X86-LABEL: add_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $1, %eax
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: add_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: orl $1, %edi
; X64-NEXT: addl %esi, %edi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%x = or i32 %xx, 1
%z = add nsw i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @add_nuw_known_nonzero_vec(<4 x i32> %xx, ptr %p) {
; X86-LABEL: add_nuw_known_nonzero_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: add_nuw_known_nonzero_vec:
; X64: # %bb.0:
; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%z = add nuw <4 x i32> %xx, <i32 1, i32 0, i32 0, i32 0>
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @sub_known_nonzero_neg_case(i32 %xx) {
; X86-LABEL: sub_known_nonzero_neg_case:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: shll %cl, %eax
; X86-NEXT: negl %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sub_known_nonzero_neg_case:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $256, %eax # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: negl %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = shl nuw nsw i32 256, %xx
%z = sub i32 0, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @sub_known_nonzero_ne_case(i32 %xx, i32 %yy) {
; X86-LABEL: sub_known_nonzero_ne_case:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: orl $64, %ecx
; X86-NEXT: andl $-65, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sub_known_nonzero_ne_case:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl $64, %eax
; X64-NEXT: andl $-65, %edi
; X64-NEXT: subl %eax, %edi
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%x = or i32 %xx, 64
%y = and i32 %xx, -65
%z = sub i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @sub_maybe_zero(i32 %x) {
; X86-LABEL: sub_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: orl $64, %ecx
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: sub_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: orl $64, %ecx
; X64-NEXT: subl %edi, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%y = or i32 %x, 64
%z = sub i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @sub_maybe_zero2(i32 %x) {
; X86-LABEL: sub_maybe_zero2:
; X86: # %bb.0:
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: sub_maybe_zero2:
; X64: # %bb.0:
; X64-NEXT: negl %edi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%z = sub i32 0, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @sub_known_nonzero_ne_vec(<4 x i32> %xx, ptr %p) {
; X86-LABEL: sub_known_nonzero_ne_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: movd {{.*#+}} xmm1 = [2,0,0,0]
; X86-NEXT: psubd %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, (%eax)
; X86-NEXT: movd %xmm1, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sub_known_nonzero_ne_vec:
; X64: # %bb.0:
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,0]
; X64-NEXT: vpsubd %xmm0, %xmm1, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%u = or <4 x i32> %xx, <i32 1, i32 0, i32 0, i32 0>
%z = sub <4 x i32> <i32 2, i32 0, i32 0, i32 0>, %u
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) {
; X86-LABEL: mul_known_nonzero_nsw:
; X86: # %bb.0:
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: mul_known_nonzero_nsw:
; X64: # %bb.0:
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: imull %edi, %esi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
%y = or i32 %yy, 256
%z = mul nsw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @mul_known_nonzero_nuw(i32 %x, i32 %yy) {
; X86-LABEL: mul_known_nonzero_nuw:
; X86: # %bb.0:
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: mul_known_nonzero_nuw:
; X64: # %bb.0:
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: imull %edi, %esi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
%y = or i32 %yy, 256
%z = mul nuw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @mul_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: mul_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: mul_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: imull %esi, %edi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%z = mul nuw nsw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @bitcast_known_nonzero(<2 x i16> %xx) {
; X86-LABEL: bitcast_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: pxor %xmm1, %xmm1
; X86-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X86-NEXT: pslld $23, %xmm0
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: cvttps2dq %xmm0, %xmm0
; X86-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [256,256,u,u,u,u,u,u]
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: bitcast_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X64-NEXT: vpslld $23, %xmm0, %xmm0
; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vcvttps2dq %xmm0, %xmm0
; X64-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,256,u,u,u,u,u,u]
; X64-NEXT: vmovd %xmm0, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%x = shl nuw nsw <2 x i16> <i16 256, i16 256>, %xx
%z = bitcast <2 x i16> %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @bitcast_maybe_zero(<2 x i16> %x) {
; X86-LABEL: bitcast_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: bitcast_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: vmovd %xmm0, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%z = bitcast <2 x i16> %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @bitcast_from_float(float %x) {
; X86-LABEL: bitcast_from_float:
; X86: # %bb.0:
; X86-NEXT: bsfl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: bitcast_from_float:
; X64: # %bb.0:
; X64-NEXT: vmovd %xmm0, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%z = bitcast float %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @abs_known_nonzero(i32 %xx) {
; X86-LABEL: abs_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $64, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: negl %ecx
; X86-NEXT: cmovsl %eax, %ecx
; X86-NEXT: rep bsfl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: abs_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: orl $64, %edi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: negl %eax
; X64-NEXT: cmovsl %edi, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or i32 %xx, 64
%z = call i32 @llvm.abs.i32(i32 %x, i1 0)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @abs_known_nonzero_vec(<4 x i32> %xx, ptr %p) nounwind {
; X86-LABEL: abs_known_nonzero_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrad $31, %xmm1
; X86-NEXT: pxor %xmm1, %xmm0
; X86-NEXT: psubd %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: abs_known_nonzero_vec:
; X64: # %bb.0:
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpabsd %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or <4 x i32> %xx, <i32 64, i32 -1, i32 0, i32 0>
%z = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 0)
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @abs_maybe_zero(i32 %x) {
; X86-LABEL: abs_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: negl %ecx
; X86-NEXT: cmovsl %eax, %ecx
; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: abs_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: negl %ecx
; X64-NEXT: cmovsl %edi, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%z = call i32 @llvm.abs.i32(i32 %x, i1 0)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @bswap_known_nonzero(i32 %xx) {
; X86-LABEL: bswap_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $64, %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: bswap_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: orl $64, %edi
; X64-NEXT: bswapl %edi
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%x = or i32 %xx, 64
%z = call i32 @llvm.bswap.i32(i32 %x)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @bswap_known_nonzero_vec(<4 x i32> %xx, ptr %p) nounwind {
; X86-LABEL: bswap_known_nonzero_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: pxor %xmm1, %xmm1
; X86-NEXT: movdqa %xmm0, %xmm2
; X86-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
; X86-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
; X86-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
; X86-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; X86-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
; X86-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
; X86-NEXT: packuswb %xmm2, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: bswap_known_nonzero_vec:
; X64: # %bb.0:
; X64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,u,u,u,u,11,10,9,8,15,14,13,12]
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or <4 x i32> %xx, <i32 64, i32 -1, i32 0, i32 0>
%z = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %x)
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @bswap_maybe_zero(i32 %x) {
; X86-LABEL: bswap_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: bswap_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: bswapl %edi
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%z = call i32 @llvm.bswap.i32(i32 %x)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @bitreverse_known_nonzero(i32 %xx) {
; X86-LABEL: bitreverse_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $64, %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: shrl $4, %eax
; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: shrl $2, %eax
; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: shrl %eax
; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: bitreverse_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: orl $64, %edi
; X64-NEXT: bswapl %edi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
; X64-NEXT: shll $4, %eax
; X64-NEXT: shrl $4, %edi
; X64-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
; X64-NEXT: orl %eax, %edi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
; X64-NEXT: shrl $2, %edi
; X64-NEXT: andl $858993459, %edi # imm = 0x33333333
; X64-NEXT: leal (%rdi,%rax,4), %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X64-NEXT: shrl %eax
; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
; X64-NEXT: leal (%rax,%rcx,2), %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or i32 %xx, 64
%z = call i32 @llvm.bitreverse.i32(i32 %x)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @bitreverse_known_nonzero_vec(<4 x i32> %xx, ptr %p) nounwind {
; X86-LABEL: bitreverse_known_nonzero_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: pxor %xmm1, %xmm1
; X86-NEXT: movdqa %xmm0, %xmm2
; X86-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
; X86-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
; X86-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
; X86-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; X86-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
; X86-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
; X86-NEXT: packuswb %xmm2, %xmm0
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrlw $4, %xmm1
; X86-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-NEXT: pand %xmm2, %xmm1
; X86-NEXT: pand %xmm2, %xmm0
; X86-NEXT: psllw $4, %xmm0
; X86-NEXT: por %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrlw $2, %xmm1
; X86-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; X86-NEXT: pand %xmm2, %xmm1
; X86-NEXT: pand %xmm2, %xmm0
; X86-NEXT: psllw $2, %xmm0
; X86-NEXT: por %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrlw $1, %xmm1
; X86-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
; X86-NEXT: pand %xmm2, %xmm1
; X86-NEXT: pand %xmm2, %xmm0
; X86-NEXT: paddb %xmm0, %xmm0
; X86-NEXT: por %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: bitreverse_known_nonzero_vec:
; X64: # %bb.0:
; X64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,u,u,u,u,11,10,9,8,15,14,13,12]
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X64-NEXT: vpand %xmm1, %xmm0, %xmm2
; X64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
; X64-NEXT: vpshufb %xmm2, %xmm3, %xmm2
; X64-NEXT: vpsrlw $4, %xmm0, %xmm0
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
; X64-NEXT: vpshufb %xmm0, %xmm1, %xmm0
; X64-NEXT: vpor %xmm0, %xmm2, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or <4 x i32> %xx, <i32 64, i32 -1, i32 0, i32 0>
%z = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %x)
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @bitreverse_maybe_zero(i32 %x) {
; X86-LABEL: bitreverse_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: shrl $4, %eax
; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: shrl $2, %eax
; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: shrl %eax
; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: bitreverse_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: bswapl %edi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
; X64-NEXT: shll $4, %eax
; X64-NEXT: shrl $4, %edi
; X64-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
; X64-NEXT: orl %eax, %edi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
; X64-NEXT: shrl $2, %edi
; X64-NEXT: andl $858993459, %edi # imm = 0x33333333
; X64-NEXT: leal (%rdi,%rax,4), %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X64-NEXT: shrl %eax
; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
; X64-NEXT: leal (%rax,%rcx,2), %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%z = call i32 @llvm.bitreverse.i32(i32 %x)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @ctpop_known_nonzero(i32 %xx) {
; X86-LABEL: ctpop_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: orl $64, %eax
; X86-NEXT: shrl %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: shrl $2, %eax
; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shrl $4, %ecx
; X86-NEXT: addl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
; X86-NEXT: shrl $24, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: ctpop_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl $64, %eax
; X64-NEXT: shrl %edi
; X64-NEXT: andl $1431655765, %edi # imm = 0x55555555
; X64-NEXT: subl %edi, %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X64-NEXT: shrl $2, %eax
; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
; X64-NEXT: addl %ecx, %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: shrl $4, %ecx
; X64-NEXT: addl %eax, %ecx
; X64-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X64-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
; X64-NEXT: shrl $24, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or i32 %xx, 64
%z = call i32 @llvm.ctpop.i32(i32 %x)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @ctpop_known_nonzero_vec(<4 x i32> %xx, ptr %p) nounwind {
; X86-LABEL: ctpop_known_nonzero_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrlw $1, %xmm1
; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-NEXT: psubb %xmm1, %xmm0
; X86-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; X86-NEXT: movdqa %xmm0, %xmm2
; X86-NEXT: pand %xmm1, %xmm2
; X86-NEXT: psrlw $2, %xmm0
; X86-NEXT: pand %xmm1, %xmm0
; X86-NEXT: paddb %xmm2, %xmm0
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrlw $4, %xmm1
; X86-NEXT: paddb %xmm0, %xmm1
; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-NEXT: pxor %xmm0, %xmm0
; X86-NEXT: movdqa %xmm1, %xmm2
; X86-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
; X86-NEXT: psadbw %xmm0, %xmm2
; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X86-NEXT: psadbw %xmm0, %xmm1
; X86-NEXT: packuswb %xmm2, %xmm1
; X86-NEXT: movdqa %xmm1, (%eax)
; X86-NEXT: movd %xmm1, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: ctpop_known_nonzero_vec:
; X64: # %bb.0:
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X64-NEXT: vpand %xmm1, %xmm0, %xmm2
; X64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; X64-NEXT: vpshufb %xmm2, %xmm3, %xmm2
; X64-NEXT: vpsrlw $4, %xmm0, %xmm0
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-NEXT: vpshufb %xmm0, %xmm3, %xmm0
; X64-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X64-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
; X64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; X64-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; X64-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or <4 x i32> %xx, <i32 64, i32 -1, i32 0, i32 0>
%z = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x)
store <4 x i32> %z, ptr %p
%e = extractelement <4 x i32> %z, i32 0
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @ctpop_maybe_zero(i32 %x) {
; X86-LABEL: ctpop_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shrl %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: shrl $2, %eax
; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shrl $4, %ecx
; X86-NEXT: addl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
; X86-NEXT: shrl $24, %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: ctpop_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shrl %eax
; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
; X64-NEXT: subl %eax, %edi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
; X64-NEXT: shrl $2, %edi
; X64-NEXT: andl $858993459, %edi # imm = 0x33333333
; X64-NEXT: addl %eax, %edi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shrl $4, %eax
; X64-NEXT: addl %edi, %eax
; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
; X64-NEXT: imull $16843009, %eax, %ecx # imm = 0x1010101
; X64-NEXT: shrl $24, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%z = call i32 @llvm.ctpop.i32(i32 %x)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @zext_known_nonzero(i16 %xx) {
; X86-LABEL: zext_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: shll %cl, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: zext_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $256, %eax # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = shl nuw nsw i16 256, %xx
%z = zext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @zext_maybe_zero(i16 %x) {
; X86-LABEL: zext_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: zext_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movzwl %di, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%z = zext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @sext_known_nonzero(i16 %xx) {
; X86-LABEL: sext_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: shll %cl, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sext_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $256, %eax # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = shl nuw nsw i16 256, %xx
%z = sext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @sext_known_nonzero_vec(<8 x i16> %xx, ptr %p) {
; X86-LABEL: sext_known_nonzero_vec:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: pxor %xmm1, %xmm1
; X86-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X86-NEXT: pslld $23, %xmm0
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: cvttps2dq %xmm0, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,2,4,5,6,7]
; X86-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,4]
; X86-NEXT: psrad $16, %xmm0
; X86-NEXT: movdqa %xmm1, 16(%eax)
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sext_known_nonzero_vec:
; X64: # %bb.0:
; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X64-NEXT: vpslld $23, %xmm0, %xmm0
; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vcvttps2dq %xmm0, %xmm0
; X64-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3,4,5,6,7]
; X64-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
; X64-NEXT: vpmovsxwd %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm1, 16(%rdi)
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vpextrd $1, %xmm0, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = shl <8 x i16> <i16 0, i16 0, i16 1, i16 0, i16 0, i16 0, i16 0, i16 0>, %xx
%s = shufflevector <8 x i16> %x, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
%z = sext <8 x i16> %s to <8 x i32>
store <8 x i32> %z, ptr %p
%e = extractelement <8 x i32> %z, i32 1
%r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
ret i32 %r
}
define i32 @sext_maybe_zero(i16 %x) {
; X86-LABEL: sext_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: sext_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movswl %di, %ecx
; X64-NEXT: movl $32, %eax
; X64-NEXT: rep bsfl %ecx, %eax
; X64-NEXT: retq
%z = sext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
define i32 @test_zext_demanded_elts(<4 x i32> %a0, ptr %p) {
; X86-LABEL: test_zext_demanded_elts:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: pxor %xmm1, %xmm1
; X86-NEXT: pxor %xmm2, %xmm2
; X86-NEXT: pcmpgtd %xmm0, %xmm2
; X86-NEXT: movdqa %xmm2, %xmm0
; X86-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
; X86-NEXT: por %xmm0, %xmm2
; X86-NEXT: movdqa %xmm2, %xmm0
; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-NEXT: movd %xmm2, %ecx
; X86-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; X86-NEXT: movdqa %xmm2, 16(%eax)
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $64, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_zext_demanded_elts:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} xmm1 = [2,4294967295,4294967295,4294967295]
; X64-NEXT: vblendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
; X64-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2
; X64-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; X64-NEXT: vmovaps %xmm2, 16(%rdi)
; X64-NEXT: vmovdqa %xmm1, (%rdi)
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: rep bsfq %rax, %rax
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%cmp = icmp sgt <4 x i32> zeroinitializer, %a0
%sel = select <4 x i1> %cmp, <4 x i32> <i32 1, i32 0, i32 0, i32 0>, <4 x i32> <i32 2, i32 -1, i32 -1, i32 -1>
%ext = zext <4 x i32> %sel to <4 x i64>
store <4 x i64> %ext, ptr %p
%lane0 = extractelement <4 x i64> %ext, i32 0
%tz = call i64 @llvm.cttz.i64(i64 %lane0, i1 false)
%res = trunc i64 %tz to i32
ret i32 %res
}
define i32 @test_sext_demanded_elts(<4 x i32> %a0, ptr %p) {
; X86-LABEL: test_sext_demanded_elts:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: pxor %xmm1, %xmm1
; X86-NEXT: pxor %xmm2, %xmm2
; X86-NEXT: pcmpgtd %xmm0, %xmm2
; X86-NEXT: movdqa %xmm2, %xmm0
; X86-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
; X86-NEXT: por %xmm0, %xmm2
; X86-NEXT: pcmpgtd %xmm2, %xmm1
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,2,3,3]
; X86-NEXT: movd %xmm2, %ecx
; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X86-NEXT: movdqa %xmm0, 16(%eax)
; X86-NEXT: movdqa %xmm2, (%eax)
; X86-NEXT: movd %xmm1, %eax
; X86-NEXT: rep bsfl %ecx, %edx
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: addl $32, %eax
; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: cmovnel %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_sext_demanded_elts:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} xmm1 = [2,4294967295,4294967295,4294967295]
; X64-NEXT: vblendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
; X64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-NEXT: vpmovsxdq %xmm1, %xmm1
; X64-NEXT: vpmovsxdq %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
; X64-NEXT: vmovdqa %xmm1, 16(%rdi)
; X64-NEXT: vmovq %xmm0, %rax
; X64-NEXT: rep bsfq %rax, %rax
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%cmp = icmp sgt <4 x i32> zeroinitializer, %a0
%sel = select <4 x i1> %cmp, <4 x i32> <i32 1, i32 0, i32 0, i32 0>, <4 x i32> <i32 2, i32 -1, i32 -1, i32 -1>
%ext = sext <4 x i32> %sel to <4 x i64>
store <4 x i64> %ext, ptr %p
%lane0 = extractelement <4 x i64> %ext, i32 0
%tz = call i64 @llvm.cttz.i64(i64 %lane0, i1 false)
%res = trunc i64 %tz to i32
ret i32 %res
}