[SelectionDAG] Combine range metadata when loads are CSEd. (#146026)
When CSEing a load with an existing load with different range
metadata, clear the range metadata on the existing
load.
This is conservative, alternatively we could calculate new range
metadata using MDNode::getMostGenericRange. Without a test case I wasn't
sure it was worth it.
MDnode::getMostGenericRange takes a non-const MDNode*, but all of
SelectionDAG
uses const MDNode*. A const_cast will need to be used somewhere or
we need to make the codebase consistent about whether MDNode pointers
should be const or not.
I'm sure this isn't the only place that needs to be updated to handle
the CSE.
Fixes #145363.diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 92da4ef..a3675ee 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1497,6 +1497,14 @@
MMO->refineAlignment(NewMMO);
}
+ void refineRanges(const MachineMemOperand *NewMMO) {
+ // If this node has range metadata that is different than NewMMO, clear the
+ // range metadata.
+ // FIXME: Union the ranges instead?
+ if (getRanges() && getRanges() != NewMMO->getRanges())
+ MMO->clearRanges();
+ }
+
const SDValue &getChain() const { return getOperand(0); }
const SDValue &getBasePtr() const {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 790d297..fe9a6ea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1275,6 +1275,8 @@
// to replace the dead one with the existing one. This can cause
// recursive merging of other unrelated nodes down the line.
Existing->intersectFlagsWith(N->getFlags());
+ if (auto *MemNode = dyn_cast<MemSDNode>(Existing))
+ MemNode->refineRanges(cast<MemSDNode>(N)->getMemOperand());
ReplaceAllUsesWith(N, Existing);
// N is now dead. Inform the listeners and delete it.
@@ -9110,8 +9112,9 @@
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
ID.AddInteger(MMO->getFlags());
void* IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
- cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ if (auto *E = cast_or_null<AtomicSDNode>(FindNodeOrInsertPos(ID, dl, IP))) {
+ E->refineAlignment(MMO);
+ E->refineRanges(MMO);
return SDValue(E, 0);
}
@@ -9402,8 +9405,9 @@
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
- cast<LoadSDNode>(E)->refineAlignment(MMO);
+ if (auto *E = cast_or_null<LoadSDNode>(FindNodeOrInsertPos(ID, dl, IP))) {
+ E->refineAlignment(MMO);
+ E->refineRanges(MMO);
return SDValue(E, 0);
}
auto *N = newSDNode<LoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
@@ -9623,8 +9627,9 @@
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
- cast<VPLoadSDNode>(E)->refineAlignment(MMO);
+ if (auto *E = cast_or_null<VPLoadSDNode>(FindNodeOrInsertPos(ID, dl, IP))) {
+ E->refineAlignment(MMO);
+ E->refineRanges(MMO);
return SDValue(E, 0);
}
auto *N = newSDNode<VPLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
diff --git a/llvm/test/CodeGen/RISCV/pr145363.ll b/llvm/test/CodeGen/RISCV/pr145363.ll
new file mode 100644
index 0000000..14bb4e4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/pr145363.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 | FileCheck %s
+
+; These two loads will CSE, we need to conservatively combine the range
+; metadata. The final assembly should not contain an OR.
+define i32 @f(ptr %p) {
+; CHECK-LABEL: f:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a0, 0(a0)
+; CHECK-NEXT: lui a1, 294471
+; CHECK-NEXT: addi a1, a1, 1064
+; CHECK-NEXT: addw a0, a0, a1
+; CHECK-NEXT: ret
+ %load = load i32, ptr %p, align 4, !range !0
+ %load2 = load i32, ptr %p, align 4
+ %add = add i32 1206154280, %load2
+ ret i32 %add
+}
+
+; The mul and getelementptr will get removed in DAGCombine causing the loads
+; to CSE after they are created.
+define i32 @test(ptr %p, i32 %x, ptr %q) {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a0)
+; CHECK-NEXT: lui a0, 294471
+; CHECK-NEXT: addi a0, a0, 1064
+; CHECK-NEXT: addw a0, a1, a0
+; CHECK-NEXT: sw a1, 0(a2)
+; CHECK-NEXT: ret
+ %load = load i32, ptr %p, align 4, !range !0
+ %mul = mul i32 0, %x
+ %a = getelementptr i32, ptr %p, i32 %mul
+ %load2 = load i32, ptr %a, align 4
+ %add = add i32 1206154280, %load2
+ store i32 %load, ptr %q
+ ret i32 %add
+}
+
+!0 = !{i32 1, i32 2, i32 3, i32 4}