[funcattrs] Infer nosync from instruction walk

Pretty straightforward use of existing infrastructure and port of the attributor inference rules for nosync.

A couple points of interest:
* I deliberately switched from "monotonic or better" to "unordered or better". This is simply me being conservative and is better in line with the rest of the optimizer. We treat monotonic conservatively pretty much everywhere.
* The operand bundle test change is suspicious. It looks like we might have missed something here, but if so, it's an issue with the existing nofree inference as well. I'm going to take a closer look at that separately.
* I needed to keep the previous inference from readnone. This surprised me, but made sense once I realized readonly inference goes to lengths to reason about local vs non-local memory and that writes to local memory are okay. This is fine for the purpose of nosync, but would e.g. prevent us from inferring nofree from readnone - which is slightly surprising.

Differential Revision: https://reviews.llvm.org/D99769
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index f76dd58..cfd302a 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1476,25 +1476,100 @@
   return Changed;
 }
 
-// Infer the nosync attribute.  For the moment, the inference is trivial
-// and relies on the readnone attribute already being infered.  This will
-// be replaced with a more robust implementation in the near future.
-static bool addNoSyncAttr(const SCCNodeSet &SCCNodes) {
-  bool Changed = false;
+// Return true if this is an atomic which has an ordering stronger than
+// unordered.  Note that this is different than the predicate we use in
+// Attributor.  Here we chose to be conservative and consider monotonic
+// operations potentially synchronizing.  We generally don't do much with
+// monotonic operations, so this is simply risk reduction.
+static bool isOrderedAtomic(Instruction *I) {
+  if (!I->isAtomic())
+    return false;
 
+  if (auto *FI = dyn_cast<FenceInst>(I))
+    // All legal orderings for fence are stronger than monotonic.
+    return FI->getSyncScopeID() != SyncScope::SingleThread;
+  else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I))
+    return true;
+  else if (auto *SI = dyn_cast<StoreInst>(I))
+    return !SI->isUnordered();
+  else if (auto *LI = dyn_cast<LoadInst>(I))
+    return !LI->isUnordered();
+  else {
+    llvm_unreachable("unknown atomic instruction?");
+  }
+}
+
+static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
+  // Volatile may synchronize
+  if (I.isVolatile())
+    return true;
+
+  // An ordered atomic may synchronize.  (See comment about on monotonic.)
+  if (isOrderedAtomic(&I))
+    return true;
+
+  auto *CB = dyn_cast<CallBase>(&I);
+  if (!CB)
+    // Non call site cases covered by the two checks above
+    return false;
+
+  if (CB->hasFnAttr(Attribute::NoSync))
+    return false;
+
+  // readnone + not convergent implies nosync
+  // (This is needed to initialize inference from declarations which aren't
+  //  explicitly nosync, but are readnone and not convergent.)
+  if (CB->hasFnAttr(Attribute::ReadNone) &&
+      !CB->hasFnAttr(Attribute::Convergent))
+    return false;
+
+  // Non volatile memset/memcpy/memmoves are nosync
+  // NOTE: Only intrinsics with volatile flags should be handled here.  All
+  // others should be marked in Intrinsics.td.
+  if (auto *MI = dyn_cast<MemIntrinsic>(&I))
+    if (!MI->isVolatile())
+      return false;
+
+  // Speculatively assume in SCC.
+  if (Function *Callee = CB->getCalledFunction())
+    if (SCCNodes.contains(Callee))
+      return false;
+
+  return true;
+}
+
+// Infer the nosync attribute.
+static bool addNoSyncAttr(const SCCNodeSet &SCCNodes) {
+  AttributeInferer AI;
+  AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
+      Attribute::NoSync,
+      // Skip already marked functions.
+      [](const Function &F) { return F.hasNoSync(); },
+      // Instructions that break nosync assumption.
+      [&SCCNodes](Instruction &I) {
+        return InstrBreaksNoSync(I, SCCNodes);
+      },
+      [](Function &F) {
+        LLVM_DEBUG(dbgs()
+                   << "Adding nosync attr to fn " << F.getName() << "\n");
+        F.setNoSync();
+        ++NumNoSync;
+      },
+      /* RequiresExactDefinition= */ true});
+  bool Changed = AI.run(SCCNodes);
+
+  // readnone + not convergent implies nosync
+  // (This is here so that we don't have to duplicate the function local
+  //  memory reasoning of the readnone analysis.)
   for (Function *F : SCCNodes) {
     if (!F || F->hasNoSync())
       continue;
-
-    // readnone + not convergent implies nosync
     if (!F->doesNotAccessMemory() || F->isConvergent())
       continue;
-
     F->setNoSync();
     NumNoSync++;
     Changed = true;
   }
-
   return Changed;
 }
 
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index d0f142b..61a4633 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -73,12 +73,12 @@
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) nounwind
 
 ; CHECK: attributes #0 = { norecurse nosync nounwind readnone willreturn }
-; CHECK: attributes #1 = { nofree norecurse nounwind willreturn writeonly }
+; CHECK: attributes #1 = { nofree norecurse nosync nounwind willreturn writeonly }
 ; CHECK: attributes #2 = { nounwind readonly }
 ; CHECK: attributes #3 = { nounwind }
 ; CHECK: attributes #4 = { nosync nounwind readnone willreturn }
-; CHECK: attributes #5 = { nofree nounwind willreturn }
-; CHECK: attributes #6 = { nofree norecurse nounwind willreturn }
+; CHECK: attributes #5 = { nofree nosync nounwind willreturn }
+; CHECK: attributes #6 = { nofree norecurse nosync nounwind willreturn }
 ; CHECK: attributes #7 = { argmemonly nofree nosync nounwind willreturn }
 
 ; Root note.
diff --git a/llvm/test/CodeGen/AMDGPU/inline-attr.ll b/llvm/test/CodeGen/AMDGPU/inline-attr.ll
index 540cc2f..acf04a9 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-attr.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-attr.ll
@@ -7,13 +7,13 @@
 ; GCN: %mul.i = fmul float %load, 1.500000e+01
 
 ; UNSAFE: attributes #0 = { norecurse nosync nounwind readnone willreturn "unsafe-fp-math"="true" }
-; UNSAFE: attributes #1 = { nofree norecurse nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" }
+; UNSAFE: attributes #1 = { nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" }
 
 ; NOINFS: attributes #0 = { norecurse nosync nounwind readnone willreturn "no-infs-fp-math"="true" }
-; NOINFS: attributes #1 = { nofree norecurse nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" }
+; NOINFS: attributes #1 = { nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" }
 
 ; NONANS: attributes #0 = { norecurse nosync nounwind readnone willreturn "no-nans-fp-math"="true" }
-; NONANS: attributes #1 = { nofree norecurse nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" }
+; NONANS: attributes #1 = { nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" }
 
 define float @foo(float %x) #0 {
 entry:
diff --git a/llvm/test/Transforms/FunctionAttrs/nofree.ll b/llvm/test/Transforms/FunctionAttrs/nofree.ll
index 028c115..a51f846 100644
--- a/llvm/test/Transforms/FunctionAttrs/nofree.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nofree.ll
@@ -36,7 +36,7 @@
 declare void @free(i8* nocapture) local_unnamed_addr #2
 
 define i32 @_Z4foo3Pi(i32* nocapture readonly %a) local_unnamed_addr #3 {
-; CHECK: Function Attrs: norecurse nounwind readonly uwtable willreturn
+; CHECK: Function Attrs: norecurse nosync nounwind readonly uwtable willreturn
 ; CHECK-LABEL: @_Z4foo3Pi(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
diff --git a/llvm/test/Transforms/FunctionAttrs/nosync.ll b/llvm/test/Transforms/FunctionAttrs/nosync.ll
index fc532b7..aed1f366 100644
--- a/llvm/test/Transforms/FunctionAttrs/nosync.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nosync.ll
@@ -93,7 +93,7 @@
 
 ; singlethread fences are okay
 define void @test9(i8* %p) {
-; CHECK: Function Attrs: nofree norecurse nounwind willreturn
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn
 ; CHECK-LABEL: @test9(
 ; CHECK-NEXT:    fence syncscope("singlethread") seq_cst
 ; CHECK-NEXT:    ret void
@@ -137,7 +137,7 @@
 }
 
 define i32 @load_unordered(i32* nocapture readonly %0) norecurse nounwind uwtable {
-; CHECK: Function Attrs: norecurse nounwind readonly uwtable willreturn
+; CHECK: Function Attrs: norecurse nosync nounwind readonly uwtable willreturn
 ; CHECK-LABEL: @load_unordered(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load atomic i32, i32* [[TMP0:%.*]] unordered, align 4
 ; CHECK-NEXT:    ret i32 [[TMP2]]
@@ -148,7 +148,7 @@
 
 ; atomic store with unordered ordering.
 define void @store_unordered(i32* nocapture %0) norecurse nounwind uwtable {
-; CHECK: Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly
+; CHECK: Function Attrs: nofree norecurse nosync nounwind uwtable willreturn writeonly
 ; CHECK-LABEL: @store_unordered(
 ; CHECK-NEXT:    store atomic i32 10, i32* [[TMP0:%.*]] unordered, align 4
 ; CHECK-NEXT:    ret void
@@ -209,9 +209,9 @@
 declare void @nosync_function() noinline nounwind uwtable nosync
 
 define void @call_nosync_function() nounwind uwtable noinline {
-; CHECK: Function Attrs: noinline nounwind uwtable
+; CHECK: Function Attrs: noinline nosync nounwind uwtable
 ; CHECK-LABEL: @call_nosync_function(
-; CHECK-NEXT:    tail call void @nosync_function() #[[ATTR7:[0-9]+]]
+; CHECK-NEXT:    tail call void @nosync_function() #[[ATTR8:[0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   tail call void @nosync_function() noinline nounwind uwtable
@@ -225,7 +225,7 @@
 define void @call_might_sync() nounwind uwtable noinline {
 ; CHECK: Function Attrs: noinline nounwind uwtable
 ; CHECK-LABEL: @call_might_sync(
-; CHECK-NEXT:    tail call void @might_sync() #[[ATTR7]]
+; CHECK-NEXT:    tail call void @might_sync() #[[ATTR8]]
 ; CHECK-NEXT:    ret void
 ;
   tail call void @might_sync() noinline nounwind uwtable
@@ -248,7 +248,7 @@
 
 ; positive, non-volatile intrinsic.
 define i32 @memset_non_volatile(i8* %ptr1, i8 %val) {
-; CHECK: Function Attrs: nofree nounwind willreturn writeonly
+; CHECK: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; CHECK-LABEL: @memset_non_volatile(
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* [[PTR1:%.*]], i8 [[VAL:%.*]], i32 8, i1 false)
 ; CHECK-NEXT:    ret i32 4
diff --git a/llvm/test/Transforms/FunctionAttrs/operand-bundles-scc.ll b/llvm/test/Transforms/FunctionAttrs/operand-bundles-scc.ll
index 78cfd03..92384a6 100644
--- a/llvm/test/Transforms/FunctionAttrs/operand-bundles-scc.ll
+++ b/llvm/test/Transforms/FunctionAttrs/operand-bundles-scc.ll
@@ -14,4 +14,4 @@
 }
 
 
-; CHECK: attributes #0 = { nofree nounwind }
+; CHECK: attributes #0 = { nofree nosync nounwind }
diff --git a/llvm/test/Transforms/FunctionAttrs/read-write-scc.ll b/llvm/test/Transforms/FunctionAttrs/read-write-scc.ll
index 968a290..578fc41 100644
--- a/llvm/test/Transforms/FunctionAttrs/read-write-scc.ll
+++ b/llvm/test/Transforms/FunctionAttrs/read-write-scc.ll
@@ -17,4 +17,4 @@
   ret void
 }
 
-; CHECK: attributes #0 = { nofree nounwind }
+; CHECK: attributes #0 = { nofree nosync nounwind }
diff --git a/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll b/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll
index 1afd97e..6b50b48 100644
--- a/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll
+++ b/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll
@@ -52,5 +52,5 @@
 !28 = !DILocation(line: 9, column: 18, scope: !2)
 !29 = !DILocation(line: 10, column: 1, scope: !2)
 
-; CHECK: attributes #0 = { nofree norecurse nounwind willreturn }
+; CHECK: attributes #0 = { nofree norecurse nosync nounwind willreturn }
 ; CHECK-NOT: foo.coefficient1