[X86] combineCMP - fold cmpEQ/NE(TRUNC(X),0) -> cmpEQ/NE(X,0)
If we are truncating from a i32 source before comparing the result against zero, then see if we can directly compare the source value against zero.
If the upper (truncated) bits are known to be zero then we can compare against that, hopefully increasing the chances of us folding the compare into a EFLAG result of the source's operation.
Fixes PR49028.
Differential Revision: https://reviews.llvm.org/D100491
GitOrigin-RevId: 9d57a77b81265c650c75511115067588790d7ac0
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ee4cdbc..8cf0bef 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -48821,15 +48821,28 @@
}
}
- // Look for a truncate with a single use.
- if (Op.getOpcode() != ISD::TRUNCATE || !Op.hasOneUse())
+ // Look for a truncate.
+ if (Op.getOpcode() != ISD::TRUNCATE)
return SDValue();
+ SDValue Trunc = Op;
Op = Op.getOperand(0);
- // Arithmetic op can only have one use.
- if (!Op.hasOneUse())
- return SDValue();
+ // See if we can compare with zero against the truncation source,
+ // which should help using the Z flag from many ops. Only do this for
+ // i32 truncated op to prevent partial-reg compares of promoted ops.
+ EVT OpVT = Op.getValueType();
+ APInt UpperBits =
+ APInt::getBitsSetFrom(OpVT.getSizeInBits(), VT.getSizeInBits());
+ if (OpVT == MVT::i32 && DAG.MaskedValueIsZero(Op, UpperBits) &&
+ onlyZeroFlagUsed(SDValue(N, 0))) {
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, dl, OpVT));
+ }
+
+ // After this the truncate and arithmetic op must have a single use.
+ if (!Trunc.hasOneUse() || !Op.hasOneUse())
+ return SDValue();
unsigned NewOpc;
switch (Op.getOpcode()) {
diff --git a/test/CodeGen/X86/and-with-overflow.ll b/test/CodeGen/X86/and-with-overflow.ll
index d83f91a..05b1db4 100644
--- a/test/CodeGen/X86/and-with-overflow.ll
+++ b/test/CodeGen/X86/and-with-overflow.ll
@@ -75,7 +75,6 @@
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: andl $-17, %eax
-; X64-NEXT: testw %ax, %ax
; X64-NEXT: cmovel %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
diff --git a/test/CodeGen/X86/pr34137.ll b/test/CodeGen/X86/pr34137.ll
index f24b96f..f374515 100644
--- a/test/CodeGen/X86/pr34137.ll
+++ b/test/CodeGen/X86/pr34137.ll
@@ -11,12 +11,10 @@
; CHECK-NEXT: movzwl {{.*}}(%rip), %eax
; CHECK-NEXT: movzwl {{.*}}(%rip), %ecx
; CHECK-NEXT: andl %eax, %ecx
-; CHECK-NEXT: movl %eax, %edx
-; CHECK-NEXT: andl %ecx, %edx
-; CHECK-NEXT: movzwl %dx, %edx
-; CHECK-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: xorl %edx, %edx
-; CHECK-NEXT: testw %cx, %ax
+; CHECK-NEXT: andl %eax, %ecx
+; CHECK-NEXT: movzwl %cx, %ecx
+; CHECK-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: sete %dl
; CHECK-NEXT: andl %eax, %edx
; CHECK-NEXT: movq %rdx, {{.*}}(%rip)
diff --git a/test/CodeGen/X86/pr49028.ll b/test/CodeGen/X86/pr49028.ll
index 9b67390..ecaefc7 100644
--- a/test/CodeGen/X86/pr49028.ll
+++ b/test/CodeGen/X86/pr49028.ll
@@ -8,7 +8,6 @@
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrl %eax
-; X86-NEXT: testw %ax, %ax
; X86-NEXT: sete (%ecx)
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
@@ -17,7 +16,6 @@
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shrl %eax
-; X64-NEXT: testw %ax, %ax
; X64-NEXT: sete (%rsi)
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
diff --git a/test/CodeGen/X86/setcc-logic.ll b/test/CodeGen/X86/setcc-logic.ll
index 743d0c1..c82a7df 100644
--- a/test/CodeGen/X86/setcc-logic.ll
+++ b/test/CodeGen/X86/setcc-logic.ll
@@ -646,12 +646,18 @@
}
define i1 @or_cmp_eq_i16(i16 zeroext %x, i16 zeroext %y) {
-; CHECK-LABEL: or_cmp_eq_i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: testw %si, %di
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: retq
+; NOBMI-LABEL: or_cmp_eq_i16:
+; NOBMI: # %bb.0:
+; NOBMI-NEXT: notl %edi
+; NOBMI-NEXT: testl %esi, %edi
+; NOBMI-NEXT: sete %al
+; NOBMI-NEXT: retq
+;
+; BMI-LABEL: or_cmp_eq_i16:
+; BMI: # %bb.0:
+; BMI-NEXT: andnl %esi, %edi, %eax
+; BMI-NEXT: sete %al
+; BMI-NEXT: retq
%o = or i16 %x, %y
%c = icmp eq i16 %x, %o
ret i1 %c
diff --git a/test/CodeGen/X86/vector-compare-any_of.ll b/test/CodeGen/X86/vector-compare-any_of.ll
index b942f31..ee7e5f9 100644
--- a/test/CodeGen/X86/vector-compare-any_of.ll
+++ b/test/CodeGen/X86/vector-compare-any_of.ll
@@ -887,7 +887,7 @@
; SSE-NEXT: cmpneqps %xmm2, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: testw %ax, %ax
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@@ -994,7 +994,7 @@
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtw %xmm0, %xmm1
; SSE-NEXT: pmovmskb %xmm1, %eax
-; SSE-NEXT: testw %ax, %ax
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@@ -1002,7 +1002,7 @@
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpmovmskb %xmm0, %eax
-; AVX-NEXT: testw %ax, %ax
+; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: setne %al
; AVX-NEXT: retq
;
@@ -1119,7 +1119,7 @@
; SSE-NEXT: pcmpeqd %xmm0, %xmm2
; SSE-NEXT: packssdw %xmm3, %xmm2
; SSE-NEXT: pmovmskb %xmm2, %eax
-; SSE-NEXT: testw %ax, %ax
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
diff --git a/test/CodeGen/X86/vector-reduce-and-cmp.ll b/test/CodeGen/X86/vector-reduce-and-cmp.ll
index c890b85..124c7df 100644
--- a/test/CodeGen/X86/vector-reduce-and-cmp.ll
+++ b/test/CodeGen/X86/vector-reduce-and-cmp.ll
@@ -470,7 +470,7 @@
; SSE-NEXT: psrld $16, %xmm1
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
-; SSE-NEXT: testw %ax, %ax
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -479,7 +479,7 @@
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: testw %ax, %ax
+; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: sete %al
; AVX-NEXT: retq
%1 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> %a0)
@@ -496,7 +496,7 @@
; SSE-NEXT: psrld $16, %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: testw %ax, %ax
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@@ -507,7 +507,7 @@
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: testw %ax, %ax
+; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: setne %al
; AVX-NEXT: retq
%1 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a0)
@@ -526,7 +526,7 @@
; SSE-NEXT: psrld $16, %xmm1
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
-; SSE-NEXT: testw %ax, %ax
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -539,7 +539,7 @@
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: testw %ax, %ax
+; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: sete %al
; AVX-NEXT: retq
%1 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a0)
@@ -559,7 +559,7 @@
; SSE-NEXT: psrld $16, %xmm1
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
-; SSE-NEXT: testw %ax, %ax
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@@ -574,7 +574,7 @@
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
-; AVX1-NEXT: testw %ax, %ax
+; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -590,7 +590,7 @@
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
-; AVX2-NEXT: testw %ax, %ax
+; AVX2-NEXT: testl %eax, %eax
; AVX2-NEXT: setne %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -606,7 +606,7 @@
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: testw %ax, %ax
+; AVX512-NEXT: testl %eax, %eax
; AVX512-NEXT: setne %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -629,7 +629,7 @@
; SSE-NEXT: psrld $16, %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: testw %ax, %ax
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -645,7 +645,7 @@
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
-; AVX1-NEXT: testw %ax, %ax
+; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -662,7 +662,7 @@
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
-; AVX2-NEXT: testw %ax, %ax
+; AVX2-NEXT: testl %eax, %eax
; AVX2-NEXT: sete %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -680,7 +680,7 @@
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: testw %ax, %ax
+; AVX512-NEXT: testl %eax, %eax
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -707,7 +707,7 @@
; SSE-NEXT: psrld $16, %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: testw %ax, %ax
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@@ -725,7 +725,7 @@
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
-; AVX1-NEXT: testw %ax, %ax
+; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -744,7 +744,7 @@
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
-; AVX2-NEXT: testw %ax, %ax
+; AVX2-NEXT: testl %eax, %eax
; AVX2-NEXT: setne %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -763,7 +763,7 @@
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: testw %ax, %ax
+; AVX512-NEXT: testl %eax, %eax
; AVX512-NEXT: setne %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -810,7 +810,7 @@
; SSE-NEXT: psrlw $8, %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: testb %al, %al
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@@ -821,7 +821,7 @@
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: testb %al, %al
+; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: setne %al
; AVX-NEXT: retq
%1 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a0)
@@ -841,7 +841,7 @@
; SSE-NEXT: psrlw $8, %xmm1
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
-; SSE-NEXT: testb %al, %al
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -854,7 +854,7 @@
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: testb %al, %al
+; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: sete %al
; AVX-NEXT: retq
%1 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a0)
@@ -876,7 +876,7 @@
; SSE-NEXT: psrlw $8, %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: testb %al, %al
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@@ -891,7 +891,7 @@
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: testb %al, %al
+; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: setne %al
; AVX-NEXT: retq
%1 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a0)
@@ -914,7 +914,7 @@
; SSE-NEXT: psrlw $8, %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: testb %al, %al
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -931,7 +931,7 @@
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
-; AVX1-NEXT: testb %al, %al
+; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -949,7 +949,7 @@
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
-; AVX2-NEXT: testb %al, %al
+; AVX2-NEXT: testl %eax, %eax
; AVX2-NEXT: sete %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -967,7 +967,7 @@
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: testb %al, %al
+; AVX512-NEXT: testl %eax, %eax
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -993,7 +993,7 @@
; SSE-NEXT: psrlw $8, %xmm1
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
-; SSE-NEXT: testb %al, %al
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
@@ -1011,7 +1011,7 @@
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
-; AVX1-NEXT: testb %al, %al
+; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1030,7 +1030,7 @@
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
-; AVX2-NEXT: testb %al, %al
+; AVX2-NEXT: testl %eax, %eax
; AVX2-NEXT: setne %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1080,7 +1080,7 @@
; SSE-NEXT: psrlw $8, %xmm1
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: movd %xmm1, %eax
-; SSE-NEXT: testb %al, %al
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
diff --git a/test/CodeGen/X86/vector-reduce-or-bool.ll b/test/CodeGen/X86/vector-reduce-or-bool.ll
index f7e6e8e..fea4d6a 100644
--- a/test/CodeGen/X86/vector-reduce-or-bool.ll
+++ b/test/CodeGen/X86/vector-reduce-or-bool.ll
@@ -1139,7 +1139,7 @@
; SSE-NEXT: pcmpeqd %xmm2, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: testw %ax, %ax
+; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;