[InstCombine] Convert atomicrmws to xchg or store where legal

Implement two more transforms of atomicrmw:
1) We can convert an atomicrmw which produces a known value in memory into an xchg instead.
2) We can convert an atomicrmw xchg w/o users into a store for some orderings.

Differential Revision: https://reviews.llvm.org/D58290



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354170 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
index 37c4397..58da7eb 100644
--- a/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
@@ -47,25 +47,73 @@
       return false;
   }
 }
+
+/// Return true if the given instruction always produces a value in memory
+/// equivelent to its value operand.
+bool isSaturating(AtomicRMWInst& RMWI) {
+  auto C = dyn_cast<ConstantInt>(RMWI.getValOperand());
+  if(!C)
+    return false;
+
+  AtomicRMWInst::BinOp Op = RMWI.getOperation();
+  switch(Op) {
+  default:
+    // TODO: fadd, fsub w/Nan
+    // Note: We avoid listing xchg to prevent transform cycles.
+    return false;
+  case AtomicRMWInst::Or:
+    return C->isAllOnesValue();
+  case AtomicRMWInst::And:
+    return C->isZero();
+  case AtomicRMWInst::Min:
+    return C->isMinValue(true);
+  case AtomicRMWInst::Max:
+    return C->isMaxValue(true);
+  case AtomicRMWInst::UMin:
+    return C->isMinValue(false);
+  case AtomicRMWInst::UMax:
+    return C->isMaxValue(false);
+  };
+}
 }
 
-
 Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
-  // TODO: Any atomicrmw op which produces a known result in memory can be
-  // replaced w/an atomicrmw xchg. (see getBinOpAbsorber)
-  
-  // TODO: Any atomicrmw xchg with no uses can be converted to a atomic store
-  // if the ordering is compatible.
-  
-  if (!isIdempotentRMW(RMWI))
-    return nullptr;
 
   // Volatile RMWs perform a load and a store, we cannot replace this by just a
-  // load. We chose not to canonicalize out of general paranoia about user
-  // expectations around volatile. 
+  // load or just a store. We chose not to canonicalize out of general paranoia
+  // about user expectations around volatile. 
   if (RMWI.isVolatile())
     return nullptr;
 
+  // Any atomicrmw op which produces a known result in memory can be
+  // replaced w/an atomicrmw xchg.
+  if (isSaturating(RMWI)) {
+    RMWI.setOperation(AtomicRMWInst::Xchg);
+    return &RMWI;
+  }
+
+  AtomicOrdering Ordering = RMWI.getOrdering();
+  assert(Ordering != AtomicOrdering::NotAtomic &&
+         Ordering != AtomicOrdering::Unordered &&
+         "AtomicRMWs don't make sense with Unordered or NotAtomic");
+
+  // Any atomicrmw xchg with no uses can be converted to a atomic store if the
+  // ordering is compatible. 
+  if (RMWI.getOperation() == AtomicRMWInst::Xchg &&
+      RMWI.use_empty()) {
+    if (Ordering != AtomicOrdering::Release &&
+        Ordering != AtomicOrdering::Monotonic)
+      return nullptr;
+    auto *SI = new StoreInst(RMWI.getValOperand(),
+                             RMWI.getPointerOperand(), &RMWI);
+    SI->setAtomic(Ordering, RMWI.getSyncScopeID());
+    SI->setAlignment(DL.getABITypeAlignment(RMWI.getType()));
+    return eraseInstFromFunction(RMWI);
+  }
+  
+  if (!isIdempotentRMW(RMWI))
+    return nullptr;
+
   // We chose to canonicalize all idempotent operations to an single
   // operation code and constant.  This makes it easier for the rest of the
   // optimizer to match easily.  The choice of or w/zero is arbitrary.
@@ -77,10 +125,6 @@
   }
 
   // Check if the required ordering is compatible with an atomic load.
-  AtomicOrdering Ordering = RMWI.getOrdering();
-  assert(Ordering != AtomicOrdering::NotAtomic &&
-         Ordering != AtomicOrdering::Unordered &&
-         "AtomicRMWs don't make sense with Unordered or NotAtomic");
   if (Ordering != AtomicOrdering::Acquire &&
       Ordering != AtomicOrdering::Monotonic)
     return nullptr;
diff --git a/test/Transforms/InstCombine/atomicrmw.ll b/test/Transforms/InstCombine/atomicrmw.ll
index e8b0cf5..c37a626 100644
--- a/test/Transforms/InstCombine/atomicrmw.ll
+++ b/test/Transforms/InstCombine/atomicrmw.ll
@@ -137,22 +137,22 @@
 
 
 ; CHECK-LABEL: sat_or_allones
-; CHECK-NEXT: %res = atomicrmw add i32* %addr, i32 -1 monotonic
+; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 -1 monotonic
 ; CHECK-NEXT: ret i32 %res
 define i32 @sat_or_allones(i32* %addr) {
-  %res = atomicrmw add i32* %addr, i32 -1 monotonic
+  %res = atomicrmw or i32* %addr, i32 -1 monotonic
   ret i32 %res
 }
 
 ; CHECK-LABEL: sat_and_zero
-; CHECK-NEXT: %res = atomicrmw and i32* %addr, i32 0 monotonic
+; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 0 monotonic
 ; CHECK-NEXT: ret i32 %res
 define i32 @sat_and_zero(i32* %addr) {
   %res = atomicrmw and i32* %addr, i32 0 monotonic
   ret i32 %res
 }
 ; CHECK-LABEL: sat_umin_uint_min
-; CHECK-NEXT: %res = atomicrmw umin i32* %addr, i32 0 monotonic
+; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 0 monotonic
 ; CHECK-NEXT: ret i32 %res
 define i32 @sat_umin_uint_min(i32* %addr) {
   %res = atomicrmw umin i32* %addr, i32 0 monotonic
@@ -160,7 +160,7 @@
 }
 
 ; CHECK-LABEL: sat_umax_uint_max
-; CHECK-NEXT: %res = atomicrmw umax i32* %addr, i32 -1 monotonic
+; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 -1 monotonic
 ; CHECK-NEXT: ret i32 %res
 define i32 @sat_umax_uint_max(i32* %addr) {
   %res = atomicrmw umax i32* %addr, i32 -1 monotonic
@@ -168,7 +168,7 @@
 }
 
 ; CHECK-LABEL: sat_min_smin_char
-; CHECK-NEXT: %res = atomicrmw min i8* %addr, i8 -128 monotonic
+; CHECK-NEXT: %res = atomicrmw xchg i8* %addr, i8 -128 monotonic
 ; CHECK-NEXT: ret i8 %res
 define i8 @sat_min_smin_char(i8* %addr) {
   %res = atomicrmw min i8* %addr, i8 -128 monotonic
@@ -176,7 +176,7 @@
 }
 
 ; CHECK-LABEL: sat_max_smax_char
-; CHECK-NEXT: %res = atomicrmw max i8* %addr, i8 127 monotonic
+; CHECK-NEXT: %res = atomicrmw xchg i8* %addr, i8 127 monotonic
 ; CHECK-NEXT: ret i8 %res
 define i8 @sat_max_smax_char(i8* %addr) {
   %res = atomicrmw max i8* %addr, i8 127 monotonic
@@ -184,7 +184,7 @@
 }
 
 ; CHECK-LABEL: xchg_unused_monotonic
-; CHECK-NEXT: atomicrmw xchg i32* %addr, i32 0 monotonic
+; CHECK-NEXT: store atomic i32 0, i32* %addr monotonic, align 4
 ; CHECK-NEXT: ret void
 define void @xchg_unused_monotonic(i32* %addr) {
   atomicrmw xchg i32* %addr, i32 0 monotonic
@@ -192,7 +192,7 @@
 }
 
 ; CHECK-LABEL: xchg_unused_release
-; CHECK-NEXT: atomicrmw xchg i32* %addr, i32 -1 release
+; CHECK-NEXT: store atomic i32 -1, i32* %addr release, align 4
 ; CHECK-NEXT: ret void
 define void @xchg_unused_release(i32* %addr) {
   atomicrmw xchg i32* %addr, i32 -1 release
@@ -215,6 +215,13 @@
   ret void
 }
 
+; CHECK-LABEL: sat_or_allones_unused
+; CHECK-NEXT: store atomic i32 -1, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret void
+define void @sat_or_allones_unused(i32* %addr) {
+  atomicrmw or i32* %addr, i32 -1 monotonic
+  ret void
+}