[RISCV] Relax a one use restriction performSRACombine
When folding (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
ignore the use count on the (shl X, 32).
The sext_inreg after the transform is free. So we're only making
2 new instructions, the add and the shl. So we only need to be
concerned with replacing the original sra+add. The original shl
can have other uses. This helps if there are multiple different
constants being added to the same shl.
GitOrigin-RevId: a2de12c987339f228f7fa412222f7e7746e220ed
diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp
index 926aaad..227f2a1 100644
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8892,11 +8892,17 @@
}
// Look for a shift left by 32.
- if (Shl.getOpcode() != ISD::SHL || !Shl.hasOneUse() ||
- !isa<ConstantSDNode>(Shl.getOperand(1)) ||
+ if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
Shl.getConstantOperandVal(1) != 32)
return SDValue();
+ // We if we didn't look through an add/sub, then the shl should have one use.
+ // If we did look through an add/sub, the sext_inreg we create is free so
+ // we're only creating 2 new instructions. It's enough to only remove the
+ // original sra+add/sub.
+ if (!AddC && !Shl.hasOneUse())
+ return SDValue();
+
SDLoc DL(N);
SDValue In = Shl.getOperand(0);
diff --git a/test/CodeGen/RISCV/rv64i-shift-sext.ll b/test/CodeGen/RISCV/rv64i-shift-sext.ll
index 9fc3902..ad1df83 100644
--- a/test/CodeGen/RISCV/rv64i-shift-sext.ll
+++ b/test/CodeGen/RISCV/rv64i-shift-sext.ll
@@ -170,3 +170,29 @@
%3 = ashr i32 %2, 15
ret i32 %3
}
+
+define i8 @test13(i8* %0, i64 %1) {
+; RV64I-LABEL: test13:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 1
+; RV64I-NEXT: subw a2, a2, a1
+; RV64I-NEXT: add a2, a0, a2
+; RV64I-NEXT: lb a2, 0(a2)
+; RV64I-NEXT: li a3, 2
+; RV64I-NEXT: subw a1, a3, a1
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lb a0, 0(a0)
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: ret
+ %3 = mul i64 %1, -4294967296
+ %4 = add i64 %3, 4294967296 ; 1 << 32
+ %5 = ashr exact i64 %4, 32
+ %6 = getelementptr inbounds i8, i8* %0, i64 %5
+ %7 = load i8, i8* %6, align 4
+ %8 = add i64 %3, 8589934592 ; 2 << 32
+ %9 = ashr exact i64 %8, 32
+ %10 = getelementptr inbounds i8, i8* %0, i64 %9
+ %11 = load i8, i8* %10, align 4
+ %12 = add i8 %7, %11
+ ret i8 %12
+}