[RISCV] Codegen for i8, i16, and i32 atomicrmw with RV32A

Introduce a new RISCVExpandPseudoInsts pass to expand atomic 
pseudo-instructions after register allocation. This is necessary in order to 
ensure that register spills aren't introduced between LL and SC, thus breaking 
the forward progress guarantee for the operation. AArch64 does something 
similar for CmpXchg (though only at O0), and Mips is moving towards this 
approach (see D31287). See also [this mailing list 
post](http://lists.llvm.org/pipermail/llvm-dev/2016-May/099490.html) from 
James Knight, which summarises the issues with lowering to ll/sc in IR or 
pre-RA.

See the [accompanying RFC 
thread](http://lists.llvm.org/pipermail/llvm-dev/2018-June/123993.html) for an 
overview of the lowering strategy.

Differential Revision: https://reviews.llvm.org/D47882


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@342534 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
index 1760302..37d0572 100644
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -163,6 +163,7 @@
     LLOnly,  // Expand the (load) instruction into just a load-linked, which has
              // greater atomic guarantees than a normal load.
     CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
+    MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
   };
 
   /// Enum that specifies when a multiplication should be expanded.
@@ -1562,6 +1563,17 @@
     llvm_unreachable("Store conditional unimplemented on this target");
   }
 
+  /// Perform a masked atomicrmw using a target-specific intrinsic. This
+  /// represents the core LL/SC loop which will be lowered at a late stage by
+  /// the backend.
+  virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder,
+                                              AtomicRMWInst *AI,
+                                              Value *AlignedAddr, Value *Incr,
+                                              Value *Mask, Value *ShiftAmt,
+                                              AtomicOrdering Ord) const {
+    llvm_unreachable("Masked atomicrmw expansion unimplemented on this target");
+  }
+
   /// Inserts in the IR a target-specific intrinsic specifying a fence.
   /// It is called by AtomicExpandPass before expanding an
   ///   AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index 0cec754..b405e86 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -1008,3 +1008,4 @@
 include "llvm/IR/IntrinsicsBPF.td"
 include "llvm/IR/IntrinsicsSystemZ.td"
 include "llvm/IR/IntrinsicsWebAssembly.td"
+include "llvm/IR/IntrinsicsRISCV.td"
diff --git a/include/llvm/IR/IntrinsicsRISCV.td b/include/llvm/IR/IntrinsicsRISCV.td
new file mode 100644
index 0000000..b656622
--- /dev/null
+++ b/include/llvm/IR/IntrinsicsRISCV.td
@@ -0,0 +1,39 @@
+//===- IntrinsicsRISCV.td - Defines RISCV intrinsics -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the RISCV-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "riscv" in {
+
+//===----------------------------------------------------------------------===//
+// Atomics
+
+class MaskedAtomicRMW32Intrinsic
+    : Intrinsic<[llvm_i32_ty],
+                [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+                [IntrArgMemOnly, NoCapture<0>]>;
+
+class MaskedAtomicRMW32WithSextIntrinsic
+    : Intrinsic<[llvm_i32_ty],
+                [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+                 llvm_i32_ty],
+                [IntrArgMemOnly, NoCapture<0>]>;
+
+def int_riscv_masked_atomicrmw_xchg_i32 : MaskedAtomicRMW32Intrinsic;
+def int_riscv_masked_atomicrmw_add_i32  : MaskedAtomicRMW32Intrinsic;
+def int_riscv_masked_atomicrmw_sub_i32  : MaskedAtomicRMW32Intrinsic;
+def int_riscv_masked_atomicrmw_nand_i32 : MaskedAtomicRMW32Intrinsic;
+def int_riscv_masked_atomicrmw_max_i32  : MaskedAtomicRMW32WithSextIntrinsic;
+def int_riscv_masked_atomicrmw_min_i32  : MaskedAtomicRMW32WithSextIntrinsic;
+def int_riscv_masked_atomicrmw_umax_i32 : MaskedAtomicRMW32Intrinsic;
+def int_riscv_masked_atomicrmw_umin_i32 : MaskedAtomicRMW32Intrinsic;
+
+} // TargetPrefix = "riscv"
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index b55afed..3a283ca 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -90,6 +90,7 @@
         TargetLoweringBase::AtomicExpansionKind ExpansionKind);
     AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
     void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
+    void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
 
     AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
     static Value *insertRMWCmpXchgLoop(
@@ -411,8 +412,9 @@
     return expandAtomicLoadToLL(LI);
   case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
     return expandAtomicLoadToCmpXchg(LI);
+  default:
+    llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
   }
-  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
 }
 
 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
@@ -574,6 +576,10 @@
     }
     return true;
   }
+  case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
+    expandAtomicRMWToMaskedIntrinsic(AI);
+    return true;
+  }
   default:
     llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
   }
@@ -662,6 +668,9 @@
                                     IRBuilder<> &Builder, Value *Loaded,
                                     Value *Shifted_Inc, Value *Inc,
                                     const PartwordMaskValues &PMV) {
+  // TODO: update to use
+  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
+  // to merge bits from two values without requiring PMV.Inv_Mask.
   switch (Op) {
   case AtomicRMWInst::Xchg: {
     Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
@@ -914,6 +923,33 @@
   I->eraseFromParent();
 }
 
+void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
+  IRBuilder<> Builder(AI);
+
+  PartwordMaskValues PMV =
+      createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
+                       TLI->getMinCmpXchgSizeInBits() / 8);
+
+  // The value operand must be sign-extended for signed min/max so that the
+  // target's signed comparison instructions can be used. Otherwise, just
+  // zero-ext.
+  Instruction::CastOps CastOp = Instruction::ZExt;
+  AtomicRMWInst::BinOp RMWOp = AI->getOperation();
+  if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
+    CastOp = Instruction::SExt;
+
+  Value *ValOperand_Shifted = Builder.CreateShl(
+      Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
+      PMV.ShiftAmt, "ValOperand_Shifted");
+  Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
+      Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
+      AI->getOrdering());
+  Value *FinalOldResult = Builder.CreateTrunc(
+      Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
+  AI->replaceAllUsesWith(FinalOldResult);
+  AI->eraseFromParent();
+}
+
 Value *AtomicExpand::insertRMWLLSCLoop(
     IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
     AtomicOrdering MemOpOrder,
diff --git a/lib/Target/RISCV/CMakeLists.txt b/lib/Target/RISCV/CMakeLists.txt
index f8d4e2b..ee5ed62 100644
--- a/lib/Target/RISCV/CMakeLists.txt
+++ b/lib/Target/RISCV/CMakeLists.txt
@@ -15,6 +15,7 @@
 
 add_llvm_target(RISCVCodeGen
   RISCVAsmPrinter.cpp
+  RISCVExpandPseudoInsts.cpp
   RISCVFrameLowering.cpp
   RISCVInstrInfo.cpp
   RISCVISelDAGToDAG.cpp
diff --git a/lib/Target/RISCV/RISCV.h b/lib/Target/RISCV/RISCV.h
index 2e4f536..b48a68f 100644
--- a/lib/Target/RISCV/RISCV.h
+++ b/lib/Target/RISCV/RISCV.h
@@ -16,6 +16,7 @@
 #define LLVM_LIB_TARGET_RISCV_RISCV_H
 
 #include "MCTargetDesc/RISCVBaseInfo.h"
+#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 class RISCVTargetMachine;
@@ -36,6 +37,9 @@
 
 FunctionPass *createRISCVMergeBaseOffsetOptPass();
 void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &);
+
+FunctionPass *createRISCVExpandPseudoPass();
+void initializeRISCVExpandPseudoPass(PassRegistry &);
 }
 
 #endif
diff --git a/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
new file mode 100644
index 0000000..1c23680
--- /dev/null
+++ b/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -0,0 +1,452 @@
+//===-- RISCVExpandPseudoInsts.cpp - Expand pseudo instructions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVTargetMachine.h"
+
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+#define RISCV_EXPAND_PSEUDO_NAME "RISCV pseudo instruction expansion pass"
+
+namespace {
+
+class RISCVExpandPseudo : public MachineFunctionPass {
+public:
+  const RISCVInstrInfo *TII;
+  static char ID;
+
+  RISCVExpandPseudo() : MachineFunctionPass(ID) {
+    initializeRISCVExpandPseudoPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override { return RISCV_EXPAND_PSEUDO_NAME; }
+
+private:
+  bool expandMBB(MachineBasicBlock &MBB);
+  bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                MachineBasicBlock::iterator &NextMBBI);
+  bool expandAtomicBinOp(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
+                         bool IsMasked, int Width,
+                         MachineBasicBlock::iterator &NextMBBI);
+  bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            AtomicRMWInst::BinOp, bool IsMasked, int Width,
+                            MachineBasicBlock::iterator &NextMBBI);
+};
+
+char RISCVExpandPseudo::ID = 0;
+
+bool RISCVExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  bool Modified = false;
+  for (auto &MBB : MF)
+    Modified |= expandMBB(MBB);
+  return Modified;
+}
+
+bool RISCVExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+    Modified |= expandMI(MBB, MBBI, NMBBI);
+    MBBI = NMBBI;
+  }
+
+  return Modified;
+}
+
+bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MBBI,
+                                 MachineBasicBlock::iterator &NextMBBI) {
+  switch (MBBI->getOpcode()) {
+  case RISCV::PseudoAtomicLoadNand32:
+    return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
+                             NextMBBI);
+  case RISCV::PseudoMaskedAtomicSwap32:
+    return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
+                             NextMBBI);
+  case RISCV::PseudoMaskedAtomicLoadAdd32:
+    return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
+  case RISCV::PseudoMaskedAtomicLoadSub32:
+    return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
+  case RISCV::PseudoMaskedAtomicLoadNand32:
+    return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
+                             NextMBBI);
+  case RISCV::PseudoMaskedAtomicLoadMax32:
+    return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
+                                NextMBBI);
+  case RISCV::PseudoMaskedAtomicLoadMin32:
+    return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
+                                NextMBBI);
+  case RISCV::PseudoMaskedAtomicLoadUMax32:
+    return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
+                                NextMBBI);
+  case RISCV::PseudoMaskedAtomicLoadUMin32:
+    return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
+                                NextMBBI);
+  }
+
+  return false;
+}
+
+static unsigned getLRForRMW32(AtomicOrdering Ordering) {
+  switch (Ordering) {
+  default:
+    llvm_unreachable("Unexpected AtomicOrdering");
+  case AtomicOrdering::Monotonic:
+    return RISCV::LR_W;
+  case AtomicOrdering::Acquire:
+    return RISCV::LR_W_AQ;
+  case AtomicOrdering::Release:
+    return RISCV::LR_W;
+  case AtomicOrdering::AcquireRelease:
+    return RISCV::LR_W_AQ;
+  case AtomicOrdering::SequentiallyConsistent:
+    return RISCV::LR_W_AQ_RL;
+  }
+}
+
+static unsigned getSCForRMW32(AtomicOrdering Ordering) {
+  switch (Ordering) {
+  default:
+    llvm_unreachable("Unexpected AtomicOrdering");
+  case AtomicOrdering::Monotonic:
+    return RISCV::SC_W;
+  case AtomicOrdering::Acquire:
+    return RISCV::SC_W;
+  case AtomicOrdering::Release:
+    return RISCV::SC_W_RL;
+  case AtomicOrdering::AcquireRelease:
+    return RISCV::SC_W_RL;
+  case AtomicOrdering::SequentiallyConsistent:
+    return RISCV::SC_W_AQ_RL;
+  }
+}
+
+static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
+                                   DebugLoc DL, MachineBasicBlock *ThisMBB,
+                                   MachineBasicBlock *LoopMBB,
+                                   MachineBasicBlock *DoneMBB,
+                                   AtomicRMWInst::BinOp BinOp, int Width) {
+  assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+  unsigned DestReg = MI.getOperand(0).getReg();
+  unsigned ScratchReg = MI.getOperand(1).getReg();
+  unsigned AddrReg = MI.getOperand(2).getReg();
+  unsigned IncrReg = MI.getOperand(3).getReg();
+  AtomicOrdering Ordering =
+      static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
+
+  // .loop:
+  //   lr.w dest, (addr)
+  //   binop scratch, dest, val
+  //   sc.w scratch, scratch, (addr)
+  //   bnez scratch, loop
+  BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+      .addReg(AddrReg);
+  switch (BinOp) {
+  default:
+    llvm_unreachable("Unexpected AtomicRMW BinOp");
+  case AtomicRMWInst::Nand:
+    BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
+        .addReg(DestReg)
+        .addReg(IncrReg);
+    BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
+        .addReg(ScratchReg)
+        .addImm(-1);
+    break;
+  }
+  BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+      .addReg(AddrReg)
+      .addReg(ScratchReg);
+  BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
+      .addReg(ScratchReg)
+      .addReg(RISCV::X0)
+      .addMBB(LoopMBB);
+}
+
+static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
+                              MachineBasicBlock *MBB, unsigned DestReg,
+                              unsigned OldValReg, unsigned NewValReg,
+                              unsigned MaskReg, unsigned ScratchReg) {
+  assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
+  assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
+  assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
+
+  // We select bits from newval and oldval using:
+  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
+  // r = oldval ^ ((oldval ^ newval) & masktargetdata);
+  BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
+      .addReg(OldValReg)
+      .addReg(NewValReg);
+  BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
+      .addReg(ScratchReg)
+      .addReg(MaskReg);
+  BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
+      .addReg(OldValReg)
+      .addReg(ScratchReg);
+}
+
+static void doMaskedAtomicBinOpExpansion(
+    const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
+    MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
+    MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
+  assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+  unsigned DestReg = MI.getOperand(0).getReg();
+  unsigned ScratchReg = MI.getOperand(1).getReg();
+  unsigned AddrReg = MI.getOperand(2).getReg();
+  unsigned IncrReg = MI.getOperand(3).getReg();
+  unsigned MaskReg = MI.getOperand(4).getReg();
+  AtomicOrdering Ordering =
+      static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
+
+  // .loop:
+  //   lr.w destreg, (alignedaddr)
+  //   binop scratch, destreg, incr
+  //   xor scratch, destreg, scratch
+  //   and scratch, scratch, masktargetdata
+  //   xor scratch, destreg, scratch
+  //   sc.w scratch, scratch, (alignedaddr)
+  //   bnez scratch, loop
+  BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+      .addReg(AddrReg);
+  switch (BinOp) {
+  default:
+    llvm_unreachable("Unexpected AtomicRMW BinOp");
+  case AtomicRMWInst::Xchg:
+    BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
+        .addReg(RISCV::X0)
+        .addReg(IncrReg);
+    break;
+  case AtomicRMWInst::Add:
+    BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
+        .addReg(DestReg)
+        .addReg(IncrReg);
+    break;
+  case AtomicRMWInst::Sub:
+    BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
+        .addReg(DestReg)
+        .addReg(IncrReg);
+    break;
+  case AtomicRMWInst::Nand:
+    BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
+        .addReg(DestReg)
+        .addReg(IncrReg);
+    BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
+        .addReg(ScratchReg)
+        .addImm(-1);
+    break;
+  }
+
+  insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
+                    ScratchReg);
+
+  BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+      .addReg(AddrReg)
+      .addReg(ScratchReg);
+  BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
+      .addReg(ScratchReg)
+      .addReg(RISCV::X0)
+      .addMBB(LoopMBB);
+}
+
+bool RISCVExpandPseudo::expandAtomicBinOp(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+    MachineBasicBlock::iterator &NextMBBI) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  MachineFunction *MF = MBB.getParent();
+  auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+  // Insert new MBBs.
+  MF->insert(++MBB.getIterator(), LoopMBB);
+  MF->insert(++LoopMBB->getIterator(), DoneMBB);
+
+  // Set up successors and transfer remaining instructions to DoneMBB.
+  LoopMBB->addSuccessor(LoopMBB);
+  LoopMBB->addSuccessor(DoneMBB);
+  DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+  DoneMBB->transferSuccessors(&MBB);
+  MBB.addSuccessor(LoopMBB);
+
+  if (!IsMasked)
+    doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
+  else
+    doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
+                                 Width);
+
+  NextMBBI = MBB.end();
+  MI.eraseFromParent();
+
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *LoopMBB);
+  computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+  return true;
+}
+
+static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
+                       MachineBasicBlock *MBB, unsigned ValReg,
+                       unsigned ShamtReg) {
+  BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
+      .addReg(ValReg)
+      .addReg(ShamtReg);
+  BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
+      .addReg(ValReg)
+      .addReg(ShamtReg);
+}
+
+bool RISCVExpandPseudo::expandAtomicMinMaxOp(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+    MachineBasicBlock::iterator &NextMBBI) {
+  assert(IsMasked == true &&
+         "Should only need to expand masked atomic max/min");
+  assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  MachineFunction *MF = MBB.getParent();
+  auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+  // Insert new MBBs.
+  MF->insert(++MBB.getIterator(), LoopHeadMBB);
+  MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
+  MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
+  MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+
+  // Set up successors and transfer remaining instructions to DoneMBB.
+  LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
+  LoopHeadMBB->addSuccessor(LoopTailMBB);
+  LoopIfBodyMBB->addSuccessor(LoopTailMBB);
+  LoopTailMBB->addSuccessor(LoopHeadMBB);
+  LoopTailMBB->addSuccessor(DoneMBB);
+  DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+  DoneMBB->transferSuccessors(&MBB);
+  MBB.addSuccessor(LoopHeadMBB);
+
+  unsigned DestReg = MI.getOperand(0).getReg();
+  unsigned Scratch1Reg = MI.getOperand(1).getReg();
+  unsigned Scratch2Reg = MI.getOperand(2).getReg();
+  unsigned AddrReg = MI.getOperand(3).getReg();
+  unsigned IncrReg = MI.getOperand(4).getReg();
+  unsigned MaskReg = MI.getOperand(5).getReg();
+  bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
+  AtomicOrdering Ordering =
+      static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
+
+  //
+  // .loophead:
+  //   lr.w destreg, (alignedaddr)
+  //   and scratch2, destreg, mask
+  //   mv scratch1, destreg
+  //   [sext scratch2 if signed min/max]
+  //   ifnochangeneeded scratch2, incr, .looptail
+  BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+      .addReg(AddrReg);
+  BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
+      .addReg(DestReg)
+      .addReg(MaskReg);
+  BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
+      .addReg(DestReg)
+      .addImm(0);
+
+  switch (BinOp) {
+  default:
+    llvm_unreachable("Unexpected AtomicRMW BinOp");
+  case AtomicRMWInst::Max: {
+    insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+    BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+        .addReg(Scratch2Reg)
+        .addReg(IncrReg)
+        .addMBB(LoopTailMBB);
+    break;
+  }
+  case AtomicRMWInst::Min: {
+    insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+    BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+        .addReg(IncrReg)
+        .addReg(Scratch2Reg)
+        .addMBB(LoopTailMBB);
+    break;
+  }
+  case AtomicRMWInst::UMax:
+    BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+        .addReg(Scratch2Reg)
+        .addReg(IncrReg)
+        .addMBB(LoopTailMBB);
+    break;
+  case AtomicRMWInst::UMin:
+    BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+        .addReg(IncrReg)
+        .addReg(Scratch2Reg)
+        .addMBB(LoopTailMBB);
+    break;
+  }
+
+  // .loopifbody:
+  //   xor scratch1, destreg, incr
+  //   and scratch1, scratch1, mask
+  //   xor scratch1, destreg, scratch1
+  insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
+                    MaskReg, Scratch1Reg);
+
+  // .looptail:
+  //   sc.w scratch1, scratch1, (addr)
+  //   bnez scratch1, loop
+  BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg)
+      .addReg(AddrReg)
+      .addReg(Scratch1Reg);
+  BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+      .addReg(Scratch1Reg)
+      .addReg(RISCV::X0)
+      .addMBB(LoopHeadMBB);
+
+  NextMBBI = MBB.end();
+  MI.eraseFromParent();
+
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
+  computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
+  computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
+  computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+  return true;
+}
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(RISCVExpandPseudo, "riscv-expand-pseudo",
+                RISCV_EXPAND_PSEUDO_NAME, false, false)
+namespace llvm {
+
+FunctionPass *createRISCVExpandPseudoPass() { return new RISCVExpandPseudo(); }
+
+} // end of namespace llvm
diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp
index 6f2e6b4..44da49c 100644
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -137,10 +137,12 @@
   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
 
-  if (Subtarget.hasStdExtA())
+  if (Subtarget.hasStdExtA()) {
     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
-  else
+    setMinCmpXchgSizeInBits(32);
+  } else {
     setMaxAtomicSizeInBitsSupported(0);
+  }
 
   setBooleanContents(ZeroOrOneBooleanContent);
 
@@ -160,6 +162,33 @@
   return VT.changeVectorElementTypeToInteger();
 }
 
+bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                             const CallInst &I,
+                                             MachineFunction &MF,
+                                             unsigned Intrinsic) const {
+  switch (Intrinsic) {
+  default:
+    return false;
+  case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
+  case Intrinsic::riscv_masked_atomicrmw_add_i32:
+  case Intrinsic::riscv_masked_atomicrmw_sub_i32:
+  case Intrinsic::riscv_masked_atomicrmw_nand_i32:
+  case Intrinsic::riscv_masked_atomicrmw_max_i32:
+  case Intrinsic::riscv_masked_atomicrmw_min_i32:
+  case Intrinsic::riscv_masked_atomicrmw_umax_i32:
+  case Intrinsic::riscv_masked_atomicrmw_umin_i32:
+    PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::getVT(PtrTy->getElementType());
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.align = 4;
+    Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+                 MachineMemOperand::MOVolatile;
+    return true;
+  }
+}
+
 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
                                                 const AddrMode &AM, Type *Ty,
                                                 unsigned AS,
@@ -1596,3 +1625,63 @@
     return Builder.CreateFence(AtomicOrdering::Acquire);
   return nullptr;
 }
+
+TargetLowering::AtomicExpansionKind
+RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+  if (Size == 8 || Size == 16)
+    return AtomicExpansionKind::MaskedIntrinsic;
+  return AtomicExpansionKind::None;
+}
+
+static Intrinsic::ID
+getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) {
+  switch (BinOp) {
+  default:
+    llvm_unreachable("Unexpected AtomicRMW BinOp");
+  case AtomicRMWInst::Xchg:
+    return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
+  case AtomicRMWInst::Add:
+    return Intrinsic::riscv_masked_atomicrmw_add_i32;
+  case AtomicRMWInst::Sub:
+    return Intrinsic::riscv_masked_atomicrmw_sub_i32;
+  case AtomicRMWInst::Nand:
+    return Intrinsic::riscv_masked_atomicrmw_nand_i32;
+  case AtomicRMWInst::Max:
+    return Intrinsic::riscv_masked_atomicrmw_max_i32;
+  case AtomicRMWInst::Min:
+    return Intrinsic::riscv_masked_atomicrmw_min_i32;
+  case AtomicRMWInst::UMax:
+    return Intrinsic::riscv_masked_atomicrmw_umax_i32;
+  case AtomicRMWInst::UMin:
+    return Intrinsic::riscv_masked_atomicrmw_umin_i32;
+  }
+}
+
+Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
+    IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
+    Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
+  Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering()));
+  Type *Tys[] = {AlignedAddr->getType()};
+  Function *LrwOpScwLoop = Intrinsic::getDeclaration(
+      AI->getModule(),
+      getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys);
+
+  // Must pass the shift amount needed to sign extend the loaded value prior
+  // to performing a signed comparison for min/max. ShiftAmt is the number of
+  // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
+  // is the number of bits to left+right shift the value in order to
+  // sign-extend.
+  if (AI->getOperation() == AtomicRMWInst::Min ||
+      AI->getOperation() == AtomicRMWInst::Max) {
+    const DataLayout &DL = AI->getModule()->getDataLayout();
+    unsigned ValWidth =
+        DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
+    Value *SextShamt = Builder.CreateSub(
+        Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt);
+    return Builder.CreateCall(LrwOpScwLoop,
+                              {AlignedAddr, Incr, Mask, SextShamt, Ordering});
+  }
+
+  return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
+}
diff --git a/lib/Target/RISCV/RISCVISelLowering.h b/lib/Target/RISCV/RISCVISelLowering.h
index 3e3e67b..e219511 100644
--- a/lib/Target/RISCV/RISCVISelLowering.h
+++ b/lib/Target/RISCV/RISCVISelLowering.h
@@ -43,6 +43,9 @@
   explicit RISCVTargetLowering(const TargetMachine &TM,
                                const RISCVSubtarget &STI);
 
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+                          MachineFunction &MF,
+                          unsigned Intrinsic) const override;
   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
                              unsigned AS,
                              Instruction *I = nullptr) const override;
@@ -115,6 +118,12 @@
   bool IsEligibleForTailCallOptimization(CCState &CCInfo,
     CallLoweringInfo &CLI, MachineFunction &MF,
     const SmallVector<CCValAssign, 16> &ArgLocs) const;
+
+  TargetLowering::AtomicExpansionKind
+  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+  virtual Value *emitMaskedAtomicRMWIntrinsic(
+      IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
+      Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override;
 };
 }
 
diff --git a/lib/Target/RISCV/RISCVInstrInfo.td b/lib/Target/RISCV/RISCVInstrInfo.td
index 720dd78..dd739f0 100644
--- a/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/lib/Target/RISCV/RISCVInstrInfo.td
@@ -603,7 +603,7 @@
 
 /// Generic pattern classes
 
-class PatGprGpr<SDPatternOperator OpNode, RVInstR Inst>
+class PatGprGpr<SDPatternOperator OpNode, RVInst Inst>
     : Pat<(OpNode GPR:$rs1, GPR:$rs2), (Inst GPR:$rs1, GPR:$rs2)>;
 class PatGprSimm12<SDPatternOperator OpNode, RVInstI Inst>
     : Pat<(OpNode GPR:$rs1, simm12:$imm12), (Inst GPR:$rs1, simm12:$imm12)>;
diff --git a/lib/Target/RISCV/RISCVInstrInfoA.td b/lib/Target/RISCV/RISCVInstrInfoA.td
index ef46892..bf8c835 100644
--- a/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -105,4 +105,129 @@
 defm : AtomicStPat<atomic_store_8,  SB, GPR>;
 defm : AtomicStPat<atomic_store_16, SH, GPR>;
 defm : AtomicStPat<atomic_store_32, SW, GPR>;
+
+/// AMOs
+
+multiclass AMOPat<string AtomicOp, string BaseInst> {
+  def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
+                  !cast<RVInst>(BaseInst)>;
+  def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
+                  !cast<RVInst>(BaseInst#"_AQ")>;
+  def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"),
+                  !cast<RVInst>(BaseInst#"_RL")>;
+  def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+                  !cast<RVInst>(BaseInst#"_AQ_RL")>;
+  def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+                  !cast<RVInst>(BaseInst#"_AQ_RL")>;
+}
+
+defm : AMOPat<"atomic_swap_32", "AMOSWAP_W">;
+defm : AMOPat<"atomic_load_add_32", "AMOADD_W">;
+defm : AMOPat<"atomic_load_and_32", "AMOAND_W">;
+defm : AMOPat<"atomic_load_or_32", "AMOOR_W">;
+defm : AMOPat<"atomic_load_xor_32", "AMOXOR_W">;
+defm : AMOPat<"atomic_load_max_32", "AMOMAX_W">;
+defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">;
+defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">;
+defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">;
+
+def : Pat<(atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr),
+          (AMOADD_W GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_acquire GPR:$addr, GPR:$incr),
+          (AMOADD_W_AQ GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_release GPR:$addr, GPR:$incr),
+          (AMOADD_W_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr),
+          (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr),
+          (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+
+/// Pseudo AMOs
+
+class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch),
+                         (ins GPR:$addr, GPR:$incr, i32imm:$ordering), []> {
+  let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 0;
+}
+
+def PseudoAtomicLoadNand32 : PseudoAMO;
+// Ordering constants must be kept in sync with the AtomicOrdering enum in 
+// AtomicOrdering.h.
+def : Pat<(atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr),
+          (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>;
+def : Pat<(atomic_load_nand_32_acquire GPR:$addr, GPR:$incr),
+          (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>;
+def : Pat<(atomic_load_nand_32_release GPR:$addr, GPR:$incr),
+          (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>;
+def : Pat<(atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr),
+          (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>;
+def : Pat<(atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr),
+          (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>;
+
+class PseudoMaskedAMO
+    : Pseudo<(outs GPR:$res, GPR:$scratch),
+             (ins GPR:$addr, GPR:$incr, GPR:$mask, i32imm:$ordering), []> {
+  let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 0;
+}
+
+class PseudoMaskedAMOMinMax
+    : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
+             (ins GPR:$addr, GPR:$incr, GPR:$mask, i32imm:$sextshamt,
+              i32imm:$ordering), []> {
+  let Constraints = "@earlyclobber $res,@earlyclobber $scratch1,"
+                    "@earlyclobber $scratch2";
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 0;
+}
+
+class PseudoMaskedAMOUMinUMax
+    : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
+             (ins GPR:$addr, GPR:$incr, GPR:$mask, i32imm:$ordering), []> {
+  let Constraints = "@earlyclobber $res,@earlyclobber $scratch1,"
+                    "@earlyclobber $scratch2";
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 0;
+}
+
+class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst>
+    : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering),
+          (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>;
+
+class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
+    : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
+           imm:$ordering),
+          (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
+           imm:$ordering)>;
+
+def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32,
+                         PseudoMaskedAtomicSwap32>;
+def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i32,
+                         PseudoMaskedAtomicLoadAdd32>;
+def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i32,
+                         PseudoMaskedAtomicLoadSub32>;
+def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i32,
+                         PseudoMaskedAtomicLoadNand32>;
+def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMOMinMax;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i32,
+                               PseudoMaskedAtomicLoadMax32>;
+def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMOMinMax;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i32,
+                               PseudoMaskedAtomicLoadMin32>;
+def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMOUMinUMax;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i32,
+                         PseudoMaskedAtomicLoadUMax32>;
+def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32,
+                         PseudoMaskedAtomicLoadUMin32>;
 } // Predicates = [HasStdExtA]
diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp
index a2ebf5b..e75da76 100644
--- a/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -27,6 +27,8 @@
 extern "C" void LLVMInitializeRISCVTarget() {
   RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
   RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
+  auto PR = PassRegistry::getPassRegistry();
+  initializeRISCVExpandPseudoPass(*PR);
 }
 
 static std::string computeDataLayout(const Triple &TT) {
@@ -78,6 +80,7 @@
   void addIRPasses() override;
   bool addInstSelector() override;
   void addPreEmitPass() override;
+  void addPreEmitPass2() override;
   void addPreRegAlloc() override;
 };
 }
@@ -99,6 +102,13 @@
 
 void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
 
+void RISCVPassConfig::addPreEmitPass2() {
+  // Schedule the expansion of AMOs at the last possible moment, avoiding the
+  // possibility for other passes to break the requirements for forward
+  // progress in the LR/SC block.
+  addPass(createRISCVExpandPseudoPass());
+}
+
 void RISCVPassConfig::addPreRegAlloc() {
   addPass(createRISCVMergeBaseOffsetOptPass());
 }
diff --git a/test/CodeGen/RISCV/atomic-rmw.ll b/test/CodeGen/RISCV/atomic-rmw.ll
index 9e1e268..f27afbb 100644
--- a/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/test/CodeGen/RISCV/atomic-rmw.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV32IA %s
 
 define i8 @atomicrmw_xchg_i8_monotonic(i8* %a, i8 %b) {
 ; RV32I-LABEL: atomicrmw_xchg_i8_monotonic:
@@ -12,6 +14,27 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    add a5, zero, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB0_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i8* %a, i8 %b monotonic
   ret i8 %1
 }
@@ -26,6 +49,27 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    add a5, zero, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB1_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i8* %a, i8 %b acquire
   ret i8 %1
 }
@@ -40,6 +84,27 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    add a5, zero, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB2_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i8* %a, i8 %b release
   ret i8 %1
 }
@@ -54,6 +119,27 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    add a5, zero, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB3_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i8* %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -68,6 +154,27 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a4, (a0)
+; RV32IA-NEXT:    add a5, zero, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB4_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i8* %a, i8 %b seq_cst
   ret i8 %1
 }
@@ -82,6 +189,27 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i8_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    add a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB5_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i8* %a, i8 %b monotonic
   ret i8 %1
 }
@@ -96,6 +224,27 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    add a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB6_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i8* %a, i8 %b acquire
   ret i8 %1
 }
@@ -110,6 +259,27 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i8_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB7_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    add a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB7_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i8* %a, i8 %b release
   ret i8 %1
 }
@@ -124,6 +294,27 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    add a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB8_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i8* %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -138,6 +329,27 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i8_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a4, (a0)
+; RV32IA-NEXT:    add a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB9_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i8* %a, i8 %b seq_cst
   ret i8 %1
 }
@@ -152,6 +364,27 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB10_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    sub a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB10_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i8* %a, i8 %b monotonic
   ret i8 %1
 }
@@ -166,6 +399,27 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    sub a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB11_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i8* %a, i8 %b acquire
   ret i8 %1
 }
@@ -180,6 +434,27 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i8_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    sub a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB12_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i8* %a, i8 %b release
   ret i8 %1
 }
@@ -194,6 +469,27 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    sub a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB13_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i8* %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -208,6 +504,27 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a4, (a0)
+; RV32IA-NEXT:    sub a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB14_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i8* %a, i8 %b seq_cst
   ret i8 %1
 }
@@ -222,6 +539,21 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i8_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    not a3, a3
+; RV32IA-NEXT:    or a1, a3, a1
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoand.w a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i8* %a, i8 %b monotonic
   ret i8 %1
 }
@@ -236,6 +568,21 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    not a3, a3
+; RV32IA-NEXT:    or a1, a3, a1
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoand.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i8* %a, i8 %b acquire
   ret i8 %1
 }
@@ -250,6 +597,21 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i8_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    not a3, a3
+; RV32IA-NEXT:    or a1, a3, a1
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoand.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i8* %a, i8 %b release
   ret i8 %1
 }
@@ -264,6 +626,21 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    not a3, a3
+; RV32IA-NEXT:    or a1, a3, a1
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoand.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i8* %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -278,6 +655,21 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    not a3, a3
+; RV32IA-NEXT:    or a1, a3, a1
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoand.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i8* %a, i8 %b seq_cst
   ret i8 %1
 }
@@ -292,6 +684,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    and a5, a4, a1
+; RV32IA-NEXT:    not a5, a5
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB20_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i8* %a, i8 %b monotonic
   ret i8 %1
 }
@@ -306,6 +720,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    and a5, a4, a1
+; RV32IA-NEXT:    not a5, a5
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB21_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i8* %a, i8 %b acquire
   ret i8 %1
 }
@@ -320,6 +756,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i8_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    and a5, a4, a1
+; RV32IA-NEXT:    not a5, a5
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB22_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i8* %a, i8 %b release
   ret i8 %1
 }
@@ -334,6 +792,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    and a5, a4, a1
+; RV32IA-NEXT:    not a5, a5
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB23_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i8* %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -348,6 +828,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a3, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a4, (a0)
+; RV32IA-NEXT:    and a5, a4, a1
+; RV32IA-NEXT:    not a5, a5
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a3
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB24_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i8* %a, i8 %b seq_cst
   ret i8 %1
 }
@@ -362,6 +864,17 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i8_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoor.w a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i8* %a, i8 %b monotonic
   ret i8 %1
 }
@@ -376,6 +889,17 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoor.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i8* %a, i8 %b acquire
   ret i8 %1
 }
@@ -390,6 +914,17 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i8_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoor.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i8* %a, i8 %b release
   ret i8 %1
 }
@@ -404,6 +939,17 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoor.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i8* %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -418,6 +964,17 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoor.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i8* %a, i8 %b seq_cst
   ret i8 %1
 }
@@ -432,6 +989,17 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoxor.w a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i8* %a, i8 %b monotonic
   ret i8 %1
 }
@@ -446,6 +1014,17 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoxor.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i8* %a, i8 %b acquire
   ret i8 %1
 }
@@ -460,6 +1039,17 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i8_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoxor.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i8* %a, i8 %b release
   ret i8 %1
 }
@@ -474,6 +1064,17 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoxor.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i8* %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -488,6 +1089,17 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoxor.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i8* %a, i8 %b seq_cst
   ret i8 %1
 }
@@ -535,6 +1147,36 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i8_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 24
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    addi a4, zero, 255
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 24
+; RV32IA-NEXT:    srai a1, a1, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a4, a1, .LBB35_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB35_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB35_3: # in Loop: Header=BB35_1 Depth=1
+; RV32IA-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB35_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i8* %a, i8 %b monotonic
   ret i8 %1
 }
@@ -585,6 +1227,36 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 24
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    addi a4, zero, 255
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 24
+; RV32IA-NEXT:    srai a1, a1, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a4, a1, .LBB36_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB36_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB36_3: # in Loop: Header=BB36_1 Depth=1
+; RV32IA-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB36_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i8* %a, i8 %b acquire
   ret i8 %1
 }
@@ -635,6 +1307,36 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i8_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 24
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    addi a4, zero, 255
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 24
+; RV32IA-NEXT:    srai a1, a1, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB37_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a4, a1, .LBB37_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB37_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB37_3: # in Loop: Header=BB37_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB37_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i8* %a, i8 %b release
   ret i8 %1
 }
@@ -688,6 +1390,36 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 24
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    addi a4, zero, 255
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 24
+; RV32IA-NEXT:    srai a1, a1, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB38_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a4, a1, .LBB38_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB38_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB38_3: # in Loop: Header=BB38_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB38_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i8* %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -738,6 +1470,36 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i8_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 24
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    addi a4, zero, 255
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 24
+; RV32IA-NEXT:    srai a1, a1, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB39_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a4, a1, .LBB39_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB39_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB39_3: # in Loop: Header=BB39_1 Depth=1
+; RV32IA-NEXT:    sc.w.aqrl a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB39_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i8* %a, i8 %b seq_cst
   ret i8 %1
 }
@@ -785,6 +1547,36 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i8_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 24
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    addi a4, zero, 255
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 24
+; RV32IA-NEXT:    srai a1, a1, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB40_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a1, a4, .LBB40_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB40_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB40_3: # in Loop: Header=BB40_1 Depth=1
+; RV32IA-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB40_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i8* %a, i8 %b monotonic
   ret i8 %1
 }
@@ -835,6 +1627,36 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 24
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    addi a4, zero, 255
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 24
+; RV32IA-NEXT:    srai a1, a1, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB41_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a1, a4, .LBB41_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB41_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB41_3: # in Loop: Header=BB41_1 Depth=1
+; RV32IA-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB41_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i8* %a, i8 %b acquire
   ret i8 %1
 }
@@ -885,6 +1707,36 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i8_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 24
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    addi a4, zero, 255
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 24
+; RV32IA-NEXT:    srai a1, a1, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB42_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a1, a4, .LBB42_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB42_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB42_3: # in Loop: Header=BB42_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB42_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i8* %a, i8 %b release
   ret i8 %1
 }
@@ -938,6 +1790,36 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 24
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    addi a4, zero, 255
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 24
+; RV32IA-NEXT:    srai a1, a1, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB43_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a1, a4, .LBB43_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB43_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB43_3: # in Loop: Header=BB43_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB43_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i8* %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -988,6 +1870,36 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i8_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 24
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    addi a4, zero, 255
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 24
+; RV32IA-NEXT:    srai a1, a1, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB44_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a1, a4, .LBB44_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB44_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB44_3: # in Loop: Header=BB44_1 Depth=1
+; RV32IA-NEXT:    sc.w.aqrl a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB44_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i8* %a, i8 %b seq_cst
   ret i8 %1
 }
@@ -1033,6 +1945,31 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a6, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB45_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    and a3, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a3, a1, .LBB45_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB45_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB45_3: # in Loop: Header=BB45_1 Depth=1
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB45_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i8* %a, i8 %b monotonic
   ret i8 %1
 }
@@ -1081,6 +2018,31 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a6, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    and a3, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a3, a1, .LBB46_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB46_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i8* %a, i8 %b acquire
   ret i8 %1
 }
@@ -1129,6 +2091,31 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i8_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a6, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    and a3, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a3, a1, .LBB47_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB47_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i8* %a, i8 %b release
   ret i8 %1
 }
@@ -1180,6 +2167,31 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a6, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    and a3, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a3, a1, .LBB48_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB48_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i8* %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -1228,6 +2240,31 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a6, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a4, (a0)
+; RV32IA-NEXT:    and a3, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a3, a1, .LBB49_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB49_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB49_3: # in Loop: Header=BB49_1 Depth=1
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB49_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i8* %a, i8 %b seq_cst
   ret i8 %1
 }
@@ -1273,6 +2310,31 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a6, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB50_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    and a3, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a1, a3, .LBB50_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB50_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB50_3: # in Loop: Header=BB50_1 Depth=1
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB50_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i8* %a, i8 %b monotonic
   ret i8 %1
 }
@@ -1321,6 +2383,31 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a6, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    and a3, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a1, a3, .LBB51_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB51_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i8* %a, i8 %b acquire
   ret i8 %1
 }
@@ -1369,6 +2456,31 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i8_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a6, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    and a3, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a1, a3, .LBB52_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB52_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i8* %a, i8 %b release
   ret i8 %1
 }
@@ -1420,6 +2532,31 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a6, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    and a3, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a1, a3, .LBB53_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB53_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i8* %a, i8 %b acq_rel
   ret i8 %1
 }
@@ -1468,6 +2605,31 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 255
+; RV32IA-NEXT:    sll a6, a3, a2
+; RV32IA-NEXT:    andi a1, a1, 255
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB54_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a4, (a0)
+; RV32IA-NEXT:    and a3, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a1, a3, .LBB54_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB54_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB54_3: # in Loop: Header=BB54_1 Depth=1
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB54_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i8* %a, i8 %b seq_cst
   ret i8 %1
 }
@@ -1482,6 +2644,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB55_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    add a5, zero, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB55_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i16* %a, i16 %b monotonic
   ret i16 %1
 }
@@ -1496,6 +2680,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    add a5, zero, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB56_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i16* %a, i16 %b acquire
   ret i16 %1
 }
@@ -1510,6 +2716,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    add a5, zero, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB57_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i16* %a, i16 %b release
   ret i16 %1
 }
@@ -1524,6 +2752,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    add a5, zero, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB58_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i16* %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -1538,6 +2788,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB59_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a4, (a0)
+; RV32IA-NEXT:    add a5, zero, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB59_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i16* %a, i16 %b seq_cst
   ret i16 %1
 }
@@ -1552,6 +2824,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i16_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB60_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    add a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB60_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i16* %a, i16 %b monotonic
   ret i16 %1
 }
@@ -1566,6 +2860,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    add a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB61_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i16* %a, i16 %b acquire
   ret i16 %1
 }
@@ -1580,6 +2896,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i16_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    add a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB62_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i16* %a, i16 %b release
   ret i16 %1
 }
@@ -1594,6 +2932,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    add a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB63_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i16* %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -1608,6 +2968,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i16_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB64_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a4, (a0)
+; RV32IA-NEXT:    add a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB64_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i16* %a, i16 %b seq_cst
   ret i16 %1
 }
@@ -1622,6 +3004,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB65_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    sub a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB65_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i16* %a, i16 %b monotonic
   ret i16 %1
 }
@@ -1636,6 +3040,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    sub a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB66_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i16* %a, i16 %b acquire
   ret i16 %1
 }
@@ -1650,6 +3076,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i16_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    sub a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB67_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i16* %a, i16 %b release
   ret i16 %1
 }
@@ -1664,6 +3112,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    sub a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB68_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i16* %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -1678,6 +3148,28 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB69_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a4, (a0)
+; RV32IA-NEXT:    sub a5, a4, a1
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB69_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i16* %a, i16 %b seq_cst
   ret i16 %1
 }
@@ -1692,6 +3184,22 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i16_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    not a2, a2
+; RV32IA-NEXT:    or a1, a2, a1
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoand.w a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i16* %a, i16 %b monotonic
   ret i16 %1
 }
@@ -1706,6 +3214,22 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    not a2, a2
+; RV32IA-NEXT:    or a1, a2, a1
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoand.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i16* %a, i16 %b acquire
   ret i16 %1
 }
@@ -1720,6 +3244,22 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i16_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    not a2, a2
+; RV32IA-NEXT:    or a1, a2, a1
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoand.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i16* %a, i16 %b release
   ret i16 %1
 }
@@ -1734,6 +3274,22 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    not a2, a2
+; RV32IA-NEXT:    or a1, a2, a1
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoand.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i16* %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -1748,6 +3304,22 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    not a2, a2
+; RV32IA-NEXT:    or a1, a2, a1
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoand.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i16* %a, i16 %b seq_cst
   ret i16 %1
 }
@@ -1762,6 +3334,29 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB75_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    and a5, a4, a1
+; RV32IA-NEXT:    not a5, a5
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB75_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i16* %a, i16 %b monotonic
   ret i16 %1
 }
@@ -1776,6 +3371,29 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB76_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    and a5, a4, a1
+; RV32IA-NEXT:    not a5, a5
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB76_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i16* %a, i16 %b acquire
   ret i16 %1
 }
@@ -1790,6 +3408,29 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i16_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB77_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    and a5, a4, a1
+; RV32IA-NEXT:    not a5, a5
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB77_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i16* %a, i16 %b release
   ret i16 %1
 }
@@ -1804,6 +3445,29 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB78_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    and a5, a4, a1
+; RV32IA-NEXT:    not a5, a5
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB78_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i16* %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -1818,6 +3482,29 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a2, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB79_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a4, (a0)
+; RV32IA-NEXT:    and a5, a4, a1
+; RV32IA-NEXT:    not a5, a5
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    and a5, a5, a2
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB79_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i16* %a, i16 %b seq_cst
   ret i16 %1
 }
@@ -1832,6 +3519,19 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i16_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoor.w a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i16* %a, i16 %b monotonic
   ret i16 %1
 }
@@ -1846,6 +3546,19 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoor.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i16* %a, i16 %b acquire
   ret i16 %1
 }
@@ -1860,6 +3573,19 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i16_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoor.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i16* %a, i16 %b release
   ret i16 %1
 }
@@ -1874,6 +3600,19 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoor.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i16* %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -1888,6 +3627,19 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoor.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i16* %a, i16 %b seq_cst
   ret i16 %1
 }
@@ -1902,6 +3654,19 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoxor.w a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i16* %a, i16 %b monotonic
   ret i16 %1
 }
@@ -1916,6 +3681,19 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoxor.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i16* %a, i16 %b acquire
   ret i16 %1
 }
@@ -1930,6 +3708,19 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i16_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoxor.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i16* %a, i16 %b release
   ret i16 %1
 }
@@ -1944,6 +3735,19 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoxor.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i16* %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -1958,6 +3762,19 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:    amoxor.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    srl a0, a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i16* %a, i16 %b seq_cst
   ret i16 %1
 }
@@ -2005,6 +3822,37 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i16_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 16
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    lui a4, 16
+; RV32IA-NEXT:    addi a4, a4, -1
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 16
+; RV32IA-NEXT:    srai a1, a1, 16
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB90_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a4, a1, .LBB90_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB90_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB90_3: # in Loop: Header=BB90_1 Depth=1
+; RV32IA-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB90_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i16* %a, i16 %b monotonic
   ret i16 %1
 }
@@ -2055,6 +3903,37 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 16
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    lui a4, 16
+; RV32IA-NEXT:    addi a4, a4, -1
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 16
+; RV32IA-NEXT:    srai a1, a1, 16
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB91_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a4, a1, .LBB91_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB91_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB91_3: # in Loop: Header=BB91_1 Depth=1
+; RV32IA-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB91_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i16* %a, i16 %b acquire
   ret i16 %1
 }
@@ -2105,6 +3984,37 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i16_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 16
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    lui a4, 16
+; RV32IA-NEXT:    addi a4, a4, -1
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 16
+; RV32IA-NEXT:    srai a1, a1, 16
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB92_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a4, a1, .LBB92_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB92_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB92_3: # in Loop: Header=BB92_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB92_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i16* %a, i16 %b release
   ret i16 %1
 }
@@ -2158,6 +4068,37 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 16
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    lui a4, 16
+; RV32IA-NEXT:    addi a4, a4, -1
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 16
+; RV32IA-NEXT:    srai a1, a1, 16
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB93_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a4, a1, .LBB93_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB93_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB93_3: # in Loop: Header=BB93_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB93_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i16* %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -2208,6 +4149,37 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i16_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 16
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    lui a4, 16
+; RV32IA-NEXT:    addi a4, a4, -1
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 16
+; RV32IA-NEXT:    srai a1, a1, 16
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB94_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a4, a1, .LBB94_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB94_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB94_3: # in Loop: Header=BB94_1 Depth=1
+; RV32IA-NEXT:    sc.w.aqrl a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB94_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i16* %a, i16 %b seq_cst
   ret i16 %1
 }
@@ -2255,6 +4227,37 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i16_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 16
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    lui a4, 16
+; RV32IA-NEXT:    addi a4, a4, -1
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 16
+; RV32IA-NEXT:    srai a1, a1, 16
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a1, a4, .LBB95_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB95_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB95_3: # in Loop: Header=BB95_1 Depth=1
+; RV32IA-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB95_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i16* %a, i16 %b monotonic
   ret i16 %1
 }
@@ -2305,6 +4308,37 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 16
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    lui a4, 16
+; RV32IA-NEXT:    addi a4, a4, -1
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 16
+; RV32IA-NEXT:    srai a1, a1, 16
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a1, a4, .LBB96_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB96_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB96_3: # in Loop: Header=BB96_1 Depth=1
+; RV32IA-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB96_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i16* %a, i16 %b acquire
   ret i16 %1
 }
@@ -2355,6 +4389,37 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i16_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 16
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    lui a4, 16
+; RV32IA-NEXT:    addi a4, a4, -1
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 16
+; RV32IA-NEXT:    srai a1, a1, 16
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a1, a4, .LBB97_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB97_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB97_3: # in Loop: Header=BB97_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB97_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i16* %a, i16 %b release
   ret i16 %1
 }
@@ -2408,6 +4473,37 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 16
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    lui a4, 16
+; RV32IA-NEXT:    addi a4, a4, -1
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 16
+; RV32IA-NEXT:    srai a1, a1, 16
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a1, a4, .LBB98_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB98_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB98_3: # in Loop: Header=BB98_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB98_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i16* %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -2458,6 +4554,37 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i16_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    slli a2, a0, 3
+; RV32IA-NEXT:    andi a2, a2, 24
+; RV32IA-NEXT:    addi a3, zero, 16
+; RV32IA-NEXT:    sub a6, a3, a2
+; RV32IA-NEXT:    lui a4, 16
+; RV32IA-NEXT:    addi a4, a4, -1
+; RV32IA-NEXT:    sll a7, a4, a2
+; RV32IA-NEXT:    slli a1, a1, 16
+; RV32IA-NEXT:    srai a1, a1, 16
+; RV32IA-NEXT:    sll a1, a1, a2
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a5, (a0)
+; RV32IA-NEXT:    and a4, a5, a7
+; RV32IA-NEXT:    mv a3, a5
+; RV32IA-NEXT:    sll a4, a4, a6
+; RV32IA-NEXT:    sra a4, a4, a6
+; RV32IA-NEXT:    bge a1, a4, .LBB99_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB99_1 Depth=1
+; RV32IA-NEXT:    xor a3, a5, a1
+; RV32IA-NEXT:    and a3, a3, a7
+; RV32IA-NEXT:    xor a3, a5, a3
+; RV32IA-NEXT:  .LBB99_3: # in Loop: Header=BB99_1 Depth=1
+; RV32IA-NEXT:    sc.w.aqrl a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB99_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a5, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i16* %a, i16 %b seq_cst
   ret i16 %1
 }
@@ -2507,6 +4634,32 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a6, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB100_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    and a2, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a2, a1, .LBB100_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB100_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB100_3: # in Loop: Header=BB100_1 Depth=1
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB100_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i16* %a, i16 %b monotonic
   ret i16 %1
 }
@@ -2559,6 +4712,32 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a6, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB101_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    and a2, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a2, a1, .LBB101_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB101_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB101_3: # in Loop: Header=BB101_1 Depth=1
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB101_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i16* %a, i16 %b acquire
   ret i16 %1
 }
@@ -2611,6 +4790,32 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i16_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a6, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB102_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    and a2, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a2, a1, .LBB102_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB102_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB102_3: # in Loop: Header=BB102_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB102_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i16* %a, i16 %b release
   ret i16 %1
 }
@@ -2666,6 +4871,32 @@
 ; RV32I-NEXT:    lw ra, 44(sp)
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a6, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB103_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    and a2, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a2, a1, .LBB103_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB103_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB103_3: # in Loop: Header=BB103_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB103_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i16* %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -2718,6 +4949,32 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a6, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB104_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a4, (a0)
+; RV32IA-NEXT:    and a2, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a2, a1, .LBB104_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB104_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB104_3: # in Loop: Header=BB104_1 Depth=1
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB104_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i16* %a, i16 %b seq_cst
   ret i16 %1
 }
@@ -2767,6 +5024,32 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a6, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB105_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    and a2, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a1, a2, .LBB105_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB105_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB105_3: # in Loop: Header=BB105_1 Depth=1
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB105_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i16* %a, i16 %b monotonic
   ret i16 %1
 }
@@ -2819,6 +5102,32 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a6, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB106_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    and a2, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a1, a2, .LBB106_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB106_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB106_3: # in Loop: Header=BB106_1 Depth=1
+; RV32IA-NEXT:    sc.w a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB106_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i16* %a, i16 %b acquire
   ret i16 %1
 }
@@ -2871,6 +5180,32 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i16_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a6, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB107_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a4, (a0)
+; RV32IA-NEXT:    and a2, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a1, a2, .LBB107_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB107_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB107_3: # in Loop: Header=BB107_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB107_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i16* %a, i16 %b release
   ret i16 %1
 }
@@ -2926,6 +5261,32 @@
 ; RV32I-NEXT:    lw ra, 44(sp)
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a6, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB108_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a4, (a0)
+; RV32IA-NEXT:    and a2, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a1, a2, .LBB108_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB108_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB108_3: # in Loop: Header=BB108_1 Depth=1
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB108_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i16* %a, i16 %b acq_rel
   ret i16 %1
 }
@@ -2978,6 +5339,32 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lui a2, 16
+; RV32IA-NEXT:    addi a2, a2, -1
+; RV32IA-NEXT:    and a1, a1, a2
+; RV32IA-NEXT:    slli a3, a0, 3
+; RV32IA-NEXT:    andi a3, a3, 24
+; RV32IA-NEXT:    sll a6, a2, a3
+; RV32IA-NEXT:    sll a1, a1, a3
+; RV32IA-NEXT:    andi a0, a0, -4
+; RV32IA-NEXT:  .LBB109_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a4, (a0)
+; RV32IA-NEXT:    and a2, a4, a6
+; RV32IA-NEXT:    mv a5, a4
+; RV32IA-NEXT:    bgeu a1, a2, .LBB109_3
+; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB109_1 Depth=1
+; RV32IA-NEXT:    xor a5, a4, a1
+; RV32IA-NEXT:    and a5, a5, a6
+; RV32IA-NEXT:    xor a5, a4, a5
+; RV32IA-NEXT:  .LBB109_3: # in Loop: Header=BB109_1 Depth=1
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a0)
+; RV32IA-NEXT:    bnez a5, .LBB109_1
+; RV32IA-NEXT:  # %bb.4:
+; RV32IA-NEXT:    srl a0, a4, a3
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i16* %a, i16 %b seq_cst
   ret i16 %1
 }
@@ -2992,6 +5379,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i32_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoswap.w a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i32* %a, i32 %b monotonic
   ret i32 %1
 }
@@ -3006,6 +5398,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i32_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoswap.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i32* %a, i32 %b acquire
   ret i32 %1
 }
@@ -3020,6 +5417,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i32_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoswap.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i32* %a, i32 %b release
   ret i32 %1
 }
@@ -3034,6 +5436,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i32_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoswap.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i32* %a, i32 %b acq_rel
   ret i32 %1
 }
@@ -3048,6 +5455,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i32_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoswap.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i32* %a, i32 %b seq_cst
   ret i32 %1
 }
@@ -3062,6 +5474,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i32_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoadd.w a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i32* %a, i32 %b monotonic
   ret i32 %1
 }
@@ -3076,6 +5493,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i32_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoadd.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i32* %a, i32 %b acquire
   ret i32 %1
 }
@@ -3090,6 +5512,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i32_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoadd.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i32* %a, i32 %b release
   ret i32 %1
 }
@@ -3104,6 +5531,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i32_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i32* %a, i32 %b acq_rel
   ret i32 %1
 }
@@ -3118,6 +5550,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i32_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i32* %a, i32 %b seq_cst
   ret i32 %1
 }
@@ -3132,6 +5569,12 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i32_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    neg a1, a1
+; RV32IA-NEXT:    amoadd.w a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i32* %a, i32 %b monotonic
   ret i32 %1
 }
@@ -3146,6 +5589,12 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i32_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    neg a1, a1
+; RV32IA-NEXT:    amoadd.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i32* %a, i32 %b acquire
   ret i32 %1
 }
@@ -3160,6 +5609,12 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i32_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    neg a1, a1
+; RV32IA-NEXT:    amoadd.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i32* %a, i32 %b release
   ret i32 %1
 }
@@ -3174,6 +5629,12 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i32_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    neg a1, a1
+; RV32IA-NEXT:    amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i32* %a, i32 %b acq_rel
   ret i32 %1
 }
@@ -3188,6 +5649,12 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i32_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    neg a1, a1
+; RV32IA-NEXT:    amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i32* %a, i32 %b seq_cst
   ret i32 %1
 }
@@ -3202,6 +5669,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i32_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoand.w a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i32* %a, i32 %b monotonic
   ret i32 %1
 }
@@ -3216,6 +5688,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i32_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoand.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i32* %a, i32 %b acquire
   ret i32 %1
 }
@@ -3230,6 +5707,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i32_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoand.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i32* %a, i32 %b release
   ret i32 %1
 }
@@ -3244,6 +5726,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i32_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoand.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i32* %a, i32 %b acq_rel
   ret i32 %1
 }
@@ -3258,6 +5745,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i32_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoand.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i32* %a, i32 %b seq_cst
   ret i32 %1
 }
@@ -3272,6 +5764,18 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:  .LBB130_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a2, (a0)
+; RV32IA-NEXT:    and a3, a2, a1
+; RV32IA-NEXT:    not a3, a3
+; RV32IA-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB130_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i32* %a, i32 %b monotonic
   ret i32 %1
 }
@@ -3286,6 +5790,18 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i32_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:  .LBB131_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a2, (a0)
+; RV32IA-NEXT:    and a3, a2, a1
+; RV32IA-NEXT:    not a3, a3
+; RV32IA-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB131_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i32* %a, i32 %b acquire
   ret i32 %1
 }
@@ -3300,6 +5816,18 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i32_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:  .LBB132_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w a2, (a0)
+; RV32IA-NEXT:    and a3, a2, a1
+; RV32IA-NEXT:    not a3, a3
+; RV32IA-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB132_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i32* %a, i32 %b release
   ret i32 %1
 }
@@ -3314,6 +5842,18 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:  .LBB133_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aq a2, (a0)
+; RV32IA-NEXT:    and a3, a2, a1
+; RV32IA-NEXT:    not a3, a3
+; RV32IA-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB133_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i32* %a, i32 %b acq_rel
   ret i32 %1
 }
@@ -3328,6 +5868,18 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:  .LBB134_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    lr.w.aqrl a2, (a0)
+; RV32IA-NEXT:    and a3, a2, a1
+; RV32IA-NEXT:    not a3, a3
+; RV32IA-NEXT:    sc.w.aqrl a3, a3, (a0)
+; RV32IA-NEXT:    bnez a3, .LBB134_1
+; RV32IA-NEXT:  # %bb.2:
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i32* %a, i32 %b seq_cst
   ret i32 %1
 }
@@ -3342,6 +5894,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i32_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoor.w a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i32* %a, i32 %b monotonic
   ret i32 %1
 }
@@ -3356,6 +5913,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i32_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoor.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i32* %a, i32 %b acquire
   ret i32 %1
 }
@@ -3370,6 +5932,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i32_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoor.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i32* %a, i32 %b release
   ret i32 %1
 }
@@ -3384,6 +5951,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i32_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoor.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i32* %a, i32 %b acq_rel
   ret i32 %1
 }
@@ -3398,6 +5970,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i32_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoor.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i32* %a, i32 %b seq_cst
   ret i32 %1
 }
@@ -3412,6 +5989,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i32_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoxor.w a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i32* %a, i32 %b monotonic
   ret i32 %1
 }
@@ -3426,6 +6008,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i32_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoxor.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i32* %a, i32 %b acquire
   ret i32 %1
 }
@@ -3440,6 +6027,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i32_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoxor.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i32* %a, i32 %b release
   ret i32 %1
 }
@@ -3454,6 +6046,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i32_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoxor.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i32* %a, i32 %b acq_rel
   ret i32 %1
 }
@@ -3468,6 +6065,11 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i32_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amoxor.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i32* %a, i32 %b seq_cst
   ret i32 %1
 }
@@ -3508,6 +6110,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i32_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amomax.w a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i32* %a, i32 %b monotonic
   ret i32 %1
 }
@@ -3551,6 +6158,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i32_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amomax.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i32* %a, i32 %b acquire
   ret i32 %1
 }
@@ -3594,6 +6206,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i32_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amomax.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i32* %a, i32 %b release
   ret i32 %1
 }
@@ -3640,6 +6257,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i32_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amomax.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i32* %a, i32 %b acq_rel
   ret i32 %1
 }
@@ -3683,6 +6305,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i32_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amomax.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i32* %a, i32 %b seq_cst
   ret i32 %1
 }
@@ -3723,6 +6350,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i32_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amomin.w a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i32* %a, i32 %b monotonic
   ret i32 %1
 }
@@ -3766,6 +6398,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i32_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amomin.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i32* %a, i32 %b acquire
   ret i32 %1
 }
@@ -3809,6 +6446,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i32_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amomin.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i32* %a, i32 %b release
   ret i32 %1
 }
@@ -3855,6 +6497,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i32_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amomin.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i32* %a, i32 %b acq_rel
   ret i32 %1
 }
@@ -3898,6 +6545,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i32_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amomin.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i32* %a, i32 %b seq_cst
   ret i32 %1
 }
@@ -3938,6 +6590,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i32_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amomaxu.w a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i32* %a, i32 %b monotonic
   ret i32 %1
 }
@@ -3981,6 +6638,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i32_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amomaxu.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i32* %a, i32 %b acquire
   ret i32 %1
 }
@@ -4024,6 +6686,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i32_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amomaxu.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i32* %a, i32 %b release
   ret i32 %1
 }
@@ -4070,6 +6737,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i32_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amomaxu.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i32* %a, i32 %b acq_rel
   ret i32 %1
 }
@@ -4113,6 +6785,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i32_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amomaxu.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i32* %a, i32 %b seq_cst
   ret i32 %1
 }
@@ -4153,6 +6830,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i32_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amominu.w a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i32* %a, i32 %b monotonic
   ret i32 %1
 }
@@ -4196,6 +6878,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i32_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amominu.w.aq a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i32* %a, i32 %b acquire
   ret i32 %1
 }
@@ -4239,6 +6926,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i32_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amominu.w.rl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i32* %a, i32 %b release
   ret i32 %1
 }
@@ -4285,6 +6977,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i32_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amominu.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i32* %a, i32 %b acq_rel
   ret i32 %1
 }
@@ -4328,6 +7025,11 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i32_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    amominu.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i32* %a, i32 %b seq_cst
   ret i32 %1
 }
@@ -4342,6 +7044,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i64_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    mv a3, zero
+; RV32IA-NEXT:    call __atomic_exchange_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i64* %a, i64 %b monotonic
   ret i64 %1
 }
@@ -4356,6 +7068,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i64_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 2
+; RV32IA-NEXT:    call __atomic_exchange_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i64* %a, i64 %b acquire
   ret i64 %1
 }
@@ -4370,6 +7092,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i64_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 3
+; RV32IA-NEXT:    call __atomic_exchange_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i64* %a, i64 %b release
   ret i64 %1
 }
@@ -4384,6 +7116,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i64_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 4
+; RV32IA-NEXT:    call __atomic_exchange_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i64* %a, i64 %b acq_rel
   ret i64 %1
 }
@@ -4398,6 +7140,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i64_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 5
+; RV32IA-NEXT:    call __atomic_exchange_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xchg i64* %a, i64 %b seq_cst
   ret i64 %1
 }
@@ -4412,6 +7164,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i64_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    mv a3, zero
+; RV32IA-NEXT:    call __atomic_fetch_add_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i64* %a, i64 %b monotonic
   ret i64 %1
 }
@@ -4426,6 +7188,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i64_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 2
+; RV32IA-NEXT:    call __atomic_fetch_add_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i64* %a, i64 %b acquire
   ret i64 %1
 }
@@ -4440,6 +7212,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i64_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 3
+; RV32IA-NEXT:    call __atomic_fetch_add_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i64* %a, i64 %b release
   ret i64 %1
 }
@@ -4454,6 +7236,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i64_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 4
+; RV32IA-NEXT:    call __atomic_fetch_add_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i64* %a, i64 %b acq_rel
   ret i64 %1
 }
@@ -4468,6 +7260,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_add_i64_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 5
+; RV32IA-NEXT:    call __atomic_fetch_add_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw add i64* %a, i64 %b seq_cst
   ret i64 %1
 }
@@ -4482,6 +7284,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i64_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    mv a3, zero
+; RV32IA-NEXT:    call __atomic_fetch_sub_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i64* %a, i64 %b monotonic
   ret i64 %1
 }
@@ -4496,6 +7308,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i64_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 2
+; RV32IA-NEXT:    call __atomic_fetch_sub_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i64* %a, i64 %b acquire
   ret i64 %1
 }
@@ -4510,6 +7332,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i64_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 3
+; RV32IA-NEXT:    call __atomic_fetch_sub_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i64* %a, i64 %b release
   ret i64 %1
 }
@@ -4524,6 +7356,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i64_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 4
+; RV32IA-NEXT:    call __atomic_fetch_sub_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i64* %a, i64 %b acq_rel
   ret i64 %1
 }
@@ -4538,6 +7380,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i64_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 5
+; RV32IA-NEXT:    call __atomic_fetch_sub_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw sub i64* %a, i64 %b seq_cst
   ret i64 %1
 }
@@ -4552,6 +7404,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i64_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    mv a3, zero
+; RV32IA-NEXT:    call __atomic_fetch_and_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i64* %a, i64 %b monotonic
   ret i64 %1
 }
@@ -4566,6 +7428,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i64_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 2
+; RV32IA-NEXT:    call __atomic_fetch_and_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i64* %a, i64 %b acquire
   ret i64 %1
 }
@@ -4580,6 +7452,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i64_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 3
+; RV32IA-NEXT:    call __atomic_fetch_and_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i64* %a, i64 %b release
   ret i64 %1
 }
@@ -4594,6 +7476,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i64_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 4
+; RV32IA-NEXT:    call __atomic_fetch_and_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i64* %a, i64 %b acq_rel
   ret i64 %1
 }
@@ -4608,6 +7500,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_and_i64_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 5
+; RV32IA-NEXT:    call __atomic_fetch_and_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw and i64* %a, i64 %b seq_cst
   ret i64 %1
 }
@@ -4622,6 +7524,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i64_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    mv a3, zero
+; RV32IA-NEXT:    call __atomic_fetch_nand_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i64* %a, i64 %b monotonic
   ret i64 %1
 }
@@ -4636,6 +7548,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i64_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 2
+; RV32IA-NEXT:    call __atomic_fetch_nand_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i64* %a, i64 %b acquire
   ret i64 %1
 }
@@ -4650,6 +7572,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i64_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 3
+; RV32IA-NEXT:    call __atomic_fetch_nand_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i64* %a, i64 %b release
   ret i64 %1
 }
@@ -4664,6 +7596,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i64_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 4
+; RV32IA-NEXT:    call __atomic_fetch_nand_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i64* %a, i64 %b acq_rel
   ret i64 %1
 }
@@ -4678,6 +7620,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_nand_i64_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 5
+; RV32IA-NEXT:    call __atomic_fetch_nand_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw nand i64* %a, i64 %b seq_cst
   ret i64 %1
 }
@@ -4692,6 +7644,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i64_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    mv a3, zero
+; RV32IA-NEXT:    call __atomic_fetch_or_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i64* %a, i64 %b monotonic
   ret i64 %1
 }
@@ -4706,6 +7668,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i64_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 2
+; RV32IA-NEXT:    call __atomic_fetch_or_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i64* %a, i64 %b acquire
   ret i64 %1
 }
@@ -4720,6 +7692,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i64_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 3
+; RV32IA-NEXT:    call __atomic_fetch_or_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i64* %a, i64 %b release
   ret i64 %1
 }
@@ -4734,6 +7716,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i64_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 4
+; RV32IA-NEXT:    call __atomic_fetch_or_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i64* %a, i64 %b acq_rel
   ret i64 %1
 }
@@ -4748,6 +7740,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_or_i64_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 5
+; RV32IA-NEXT:    call __atomic_fetch_or_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw or i64* %a, i64 %b seq_cst
   ret i64 %1
 }
@@ -4762,6 +7764,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i64_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    mv a3, zero
+; RV32IA-NEXT:    call __atomic_fetch_xor_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i64* %a, i64 %b monotonic
   ret i64 %1
 }
@@ -4776,6 +7788,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i64_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 2
+; RV32IA-NEXT:    call __atomic_fetch_xor_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i64* %a, i64 %b acquire
   ret i64 %1
 }
@@ -4790,6 +7812,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i64_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 3
+; RV32IA-NEXT:    call __atomic_fetch_xor_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i64* %a, i64 %b release
   ret i64 %1
 }
@@ -4804,6 +7836,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i64_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 4
+; RV32IA-NEXT:    call __atomic_fetch_xor_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i64* %a, i64 %b acq_rel
   ret i64 %1
 }
@@ -4818,6 +7860,16 @@
 ; RV32I-NEXT:    lw ra, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i64_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp)
+; RV32IA-NEXT:    addi a3, zero, 5
+; RV32IA-NEXT:    call __atomic_fetch_xor_8
+; RV32IA-NEXT:    lw ra, 12(sp)
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
   %1 = atomicrmw xor i64* %a, i64 %b seq_cst
   ret i64 %1
 }
@@ -4880,6 +7932,64 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i64_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:  .LBB200_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB200_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB200_1 Depth=1
+; RV32IA-NEXT:    slt a0, s1, a1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB200_4
+; RV32IA-NEXT:    j .LBB200_5
+; RV32IA-NEXT:  .LBB200_3: # in Loop: Header=BB200_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB200_5
+; RV32IA-NEXT:  .LBB200_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB200_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB200_5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB200_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB200_7
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB200_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB200_7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB200_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, zero
+; RV32IA-NEXT:    mv a5, zero
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB200_1
+; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i64* %a, i64 %b monotonic
   ret i64 %1
 }
@@ -4945,6 +8055,67 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i64_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    sw s5, 8(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:    addi s5, zero, 2
+; RV32IA-NEXT:  .LBB201_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB201_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB201_1 Depth=1
+; RV32IA-NEXT:    slt a0, s1, a1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB201_4
+; RV32IA-NEXT:    j .LBB201_5
+; RV32IA-NEXT:  .LBB201_3: # in Loop: Header=BB201_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB201_5
+; RV32IA-NEXT:  .LBB201_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB201_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB201_5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB201_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB201_7
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB201_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB201_7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB201_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, s5
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB201_1
+; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s5, 8(sp)
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i64* %a, i64 %b acquire
   ret i64 %1
 }
@@ -5010,6 +8181,67 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i64_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    sw s5, 8(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:    addi s5, zero, 3
+; RV32IA-NEXT:  .LBB202_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB202_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB202_1 Depth=1
+; RV32IA-NEXT:    slt a0, s1, a1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB202_4
+; RV32IA-NEXT:    j .LBB202_5
+; RV32IA-NEXT:  .LBB202_3: # in Loop: Header=BB202_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB202_5
+; RV32IA-NEXT:  .LBB202_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB202_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB202_5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB202_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB202_7
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB202_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB202_7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB202_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, zero
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB202_1
+; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s5, 8(sp)
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i64* %a, i64 %b release
   ret i64 %1
 }
@@ -5078,6 +8310,70 @@
 ; RV32I-NEXT:    lw ra, 44(sp)
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i64_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -48
+; RV32IA-NEXT:    sw ra, 44(sp)
+; RV32IA-NEXT:    sw s1, 40(sp)
+; RV32IA-NEXT:    sw s2, 36(sp)
+; RV32IA-NEXT:    sw s3, 32(sp)
+; RV32IA-NEXT:    sw s4, 28(sp)
+; RV32IA-NEXT:    sw s5, 24(sp)
+; RV32IA-NEXT:    sw s6, 20(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    addi s4, sp, 8
+; RV32IA-NEXT:    addi s5, zero, 4
+; RV32IA-NEXT:    addi s6, zero, 2
+; RV32IA-NEXT:  .LBB203_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB203_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB203_1 Depth=1
+; RV32IA-NEXT:    slt a0, s1, a1
+; RV32IA-NEXT:    sw a2, 8(sp)
+; RV32IA-NEXT:    beqz a0, .LBB203_4
+; RV32IA-NEXT:    j .LBB203_5
+; RV32IA-NEXT:  .LBB203_3: # in Loop: Header=BB203_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:    sw a2, 8(sp)
+; RV32IA-NEXT:    bnez a0, .LBB203_5
+; RV32IA-NEXT:  .LBB203_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB203_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB203_5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB203_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB203_7
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB203_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB203_7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB203_1 Depth=1
+; RV32IA-NEXT:    sw a1, 12(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, s6
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 12(sp)
+; RV32IA-NEXT:    lw a2, 8(sp)
+; RV32IA-NEXT:    beqz a0, .LBB203_1
+; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s6, 20(sp)
+; RV32IA-NEXT:    lw s5, 24(sp)
+; RV32IA-NEXT:    lw s4, 28(sp)
+; RV32IA-NEXT:    lw s3, 32(sp)
+; RV32IA-NEXT:    lw s2, 36(sp)
+; RV32IA-NEXT:    lw s1, 40(sp)
+; RV32IA-NEXT:    lw ra, 44(sp)
+; RV32IA-NEXT:    addi sp, sp, 48
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i64* %a, i64 %b acq_rel
   ret i64 %1
 }
@@ -5143,6 +8439,67 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_max_i64_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    sw s5, 8(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:    addi s5, zero, 5
+; RV32IA-NEXT:  .LBB204_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB204_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB204_1 Depth=1
+; RV32IA-NEXT:    slt a0, s1, a1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB204_4
+; RV32IA-NEXT:    j .LBB204_5
+; RV32IA-NEXT:  .LBB204_3: # in Loop: Header=BB204_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB204_5
+; RV32IA-NEXT:  .LBB204_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB204_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB204_5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB204_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB204_7
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB204_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB204_7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB204_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, s5
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB204_1
+; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s5, 8(sp)
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw max i64* %a, i64 %b seq_cst
   ret i64 %1
 }
@@ -5206,6 +8563,65 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i64_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:  .LBB205_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB205_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB205_1 Depth=1
+; RV32IA-NEXT:    slt a0, s1, a1
+; RV32IA-NEXT:    j .LBB205_4
+; RV32IA-NEXT:  .LBB205_3: # in Loop: Header=BB205_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB205_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB205_1 Depth=1
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB205_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB205_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB205_6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB205_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB205_8
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB205_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB205_8: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB205_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, zero
+; RV32IA-NEXT:    mv a5, zero
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB205_1
+; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i64* %a, i64 %b monotonic
   ret i64 %1
 }
@@ -5272,6 +8688,68 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i64_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    sw s5, 8(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:    addi s5, zero, 2
+; RV32IA-NEXT:  .LBB206_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB206_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB206_1 Depth=1
+; RV32IA-NEXT:    slt a0, s1, a1
+; RV32IA-NEXT:    j .LBB206_4
+; RV32IA-NEXT:  .LBB206_3: # in Loop: Header=BB206_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB206_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB206_1 Depth=1
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB206_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB206_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB206_6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB206_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB206_8
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB206_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB206_8: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB206_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, s5
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB206_1
+; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s5, 8(sp)
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i64* %a, i64 %b acquire
   ret i64 %1
 }
@@ -5338,6 +8816,68 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i64_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    sw s5, 8(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:    addi s5, zero, 3
+; RV32IA-NEXT:  .LBB207_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB207_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB207_1 Depth=1
+; RV32IA-NEXT:    slt a0, s1, a1
+; RV32IA-NEXT:    j .LBB207_4
+; RV32IA-NEXT:  .LBB207_3: # in Loop: Header=BB207_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB207_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB207_1 Depth=1
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB207_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB207_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB207_6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB207_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB207_8
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB207_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB207_8: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB207_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, zero
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB207_1
+; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s5, 8(sp)
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i64* %a, i64 %b release
   ret i64 %1
 }
@@ -5407,6 +8947,71 @@
 ; RV32I-NEXT:    lw ra, 44(sp)
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i64_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -48
+; RV32IA-NEXT:    sw ra, 44(sp)
+; RV32IA-NEXT:    sw s1, 40(sp)
+; RV32IA-NEXT:    sw s2, 36(sp)
+; RV32IA-NEXT:    sw s3, 32(sp)
+; RV32IA-NEXT:    sw s4, 28(sp)
+; RV32IA-NEXT:    sw s5, 24(sp)
+; RV32IA-NEXT:    sw s6, 20(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    addi s4, sp, 8
+; RV32IA-NEXT:    addi s5, zero, 4
+; RV32IA-NEXT:    addi s6, zero, 2
+; RV32IA-NEXT:  .LBB208_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB208_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB208_1 Depth=1
+; RV32IA-NEXT:    slt a0, s1, a1
+; RV32IA-NEXT:    j .LBB208_4
+; RV32IA-NEXT:  .LBB208_3: # in Loop: Header=BB208_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB208_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB208_1 Depth=1
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 8(sp)
+; RV32IA-NEXT:    bnez a0, .LBB208_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB208_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB208_6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB208_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB208_8
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB208_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB208_8: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB208_1 Depth=1
+; RV32IA-NEXT:    sw a1, 12(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, s6
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 12(sp)
+; RV32IA-NEXT:    lw a2, 8(sp)
+; RV32IA-NEXT:    beqz a0, .LBB208_1
+; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s6, 20(sp)
+; RV32IA-NEXT:    lw s5, 24(sp)
+; RV32IA-NEXT:    lw s4, 28(sp)
+; RV32IA-NEXT:    lw s3, 32(sp)
+; RV32IA-NEXT:    lw s2, 36(sp)
+; RV32IA-NEXT:    lw s1, 40(sp)
+; RV32IA-NEXT:    lw ra, 44(sp)
+; RV32IA-NEXT:    addi sp, sp, 48
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i64* %a, i64 %b acq_rel
   ret i64 %1
 }
@@ -5473,6 +9078,68 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_min_i64_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    sw s5, 8(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:    addi s5, zero, 5
+; RV32IA-NEXT:  .LBB209_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB209_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB209_1 Depth=1
+; RV32IA-NEXT:    slt a0, s1, a1
+; RV32IA-NEXT:    j .LBB209_4
+; RV32IA-NEXT:  .LBB209_3: # in Loop: Header=BB209_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB209_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB209_1 Depth=1
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB209_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB209_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB209_6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB209_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB209_8
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB209_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB209_8: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB209_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, s5
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB209_1
+; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s5, 8(sp)
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw min i64* %a, i64 %b seq_cst
   ret i64 %1
 }
@@ -5535,6 +9202,64 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i64_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:  .LBB210_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB210_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB210_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s1, a1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB210_4
+; RV32IA-NEXT:    j .LBB210_5
+; RV32IA-NEXT:  .LBB210_3: # in Loop: Header=BB210_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB210_5
+; RV32IA-NEXT:  .LBB210_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB210_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB210_5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB210_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB210_7
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB210_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB210_7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB210_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, zero
+; RV32IA-NEXT:    mv a5, zero
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB210_1
+; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i64* %a, i64 %b monotonic
   ret i64 %1
 }
@@ -5600,6 +9325,67 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i64_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    sw s5, 8(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:    addi s5, zero, 2
+; RV32IA-NEXT:  .LBB211_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB211_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB211_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s1, a1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB211_4
+; RV32IA-NEXT:    j .LBB211_5
+; RV32IA-NEXT:  .LBB211_3: # in Loop: Header=BB211_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB211_5
+; RV32IA-NEXT:  .LBB211_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB211_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB211_5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB211_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB211_7
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB211_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB211_7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB211_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, s5
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB211_1
+; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s5, 8(sp)
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i64* %a, i64 %b acquire
   ret i64 %1
 }
@@ -5665,6 +9451,67 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i64_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    sw s5, 8(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:    addi s5, zero, 3
+; RV32IA-NEXT:  .LBB212_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB212_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB212_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s1, a1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB212_4
+; RV32IA-NEXT:    j .LBB212_5
+; RV32IA-NEXT:  .LBB212_3: # in Loop: Header=BB212_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB212_5
+; RV32IA-NEXT:  .LBB212_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB212_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB212_5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB212_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB212_7
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB212_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB212_7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB212_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, zero
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB212_1
+; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s5, 8(sp)
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i64* %a, i64 %b release
   ret i64 %1
 }
@@ -5733,6 +9580,70 @@
 ; RV32I-NEXT:    lw ra, 44(sp)
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i64_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -48
+; RV32IA-NEXT:    sw ra, 44(sp)
+; RV32IA-NEXT:    sw s1, 40(sp)
+; RV32IA-NEXT:    sw s2, 36(sp)
+; RV32IA-NEXT:    sw s3, 32(sp)
+; RV32IA-NEXT:    sw s4, 28(sp)
+; RV32IA-NEXT:    sw s5, 24(sp)
+; RV32IA-NEXT:    sw s6, 20(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    addi s4, sp, 8
+; RV32IA-NEXT:    addi s5, zero, 4
+; RV32IA-NEXT:    addi s6, zero, 2
+; RV32IA-NEXT:  .LBB213_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB213_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB213_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s1, a1
+; RV32IA-NEXT:    sw a2, 8(sp)
+; RV32IA-NEXT:    beqz a0, .LBB213_4
+; RV32IA-NEXT:    j .LBB213_5
+; RV32IA-NEXT:  .LBB213_3: # in Loop: Header=BB213_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:    sw a2, 8(sp)
+; RV32IA-NEXT:    bnez a0, .LBB213_5
+; RV32IA-NEXT:  .LBB213_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB213_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB213_5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB213_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB213_7
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB213_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB213_7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB213_1 Depth=1
+; RV32IA-NEXT:    sw a1, 12(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, s6
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 12(sp)
+; RV32IA-NEXT:    lw a2, 8(sp)
+; RV32IA-NEXT:    beqz a0, .LBB213_1
+; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s6, 20(sp)
+; RV32IA-NEXT:    lw s5, 24(sp)
+; RV32IA-NEXT:    lw s4, 28(sp)
+; RV32IA-NEXT:    lw s3, 32(sp)
+; RV32IA-NEXT:    lw s2, 36(sp)
+; RV32IA-NEXT:    lw s1, 40(sp)
+; RV32IA-NEXT:    lw ra, 44(sp)
+; RV32IA-NEXT:    addi sp, sp, 48
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i64* %a, i64 %b acq_rel
   ret i64 %1
 }
@@ -5798,6 +9709,67 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umax_i64_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    sw s5, 8(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:    addi s5, zero, 5
+; RV32IA-NEXT:  .LBB214_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB214_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB214_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s1, a1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB214_4
+; RV32IA-NEXT:    j .LBB214_5
+; RV32IA-NEXT:  .LBB214_3: # in Loop: Header=BB214_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB214_5
+; RV32IA-NEXT:  .LBB214_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB214_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB214_5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB214_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB214_7
+; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB214_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB214_7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB214_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, s5
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB214_1
+; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s5, 8(sp)
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umax i64* %a, i64 %b seq_cst
   ret i64 %1
 }
@@ -5861,6 +9833,65 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i64_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:  .LBB215_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB215_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB215_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s1, a1
+; RV32IA-NEXT:    j .LBB215_4
+; RV32IA-NEXT:  .LBB215_3: # in Loop: Header=BB215_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB215_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB215_1 Depth=1
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB215_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB215_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB215_6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB215_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB215_8
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB215_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB215_8: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB215_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, zero
+; RV32IA-NEXT:    mv a5, zero
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB215_1
+; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i64* %a, i64 %b monotonic
   ret i64 %1
 }
@@ -5927,6 +9958,68 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i64_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    sw s5, 8(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:    addi s5, zero, 2
+; RV32IA-NEXT:  .LBB216_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB216_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB216_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s1, a1
+; RV32IA-NEXT:    j .LBB216_4
+; RV32IA-NEXT:  .LBB216_3: # in Loop: Header=BB216_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB216_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB216_1 Depth=1
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB216_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB216_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB216_6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB216_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB216_8
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB216_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB216_8: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB216_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, s5
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB216_1
+; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s5, 8(sp)
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i64* %a, i64 %b acquire
   ret i64 %1
 }
@@ -5993,6 +10086,68 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i64_release:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    sw s5, 8(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:    addi s5, zero, 3
+; RV32IA-NEXT:  .LBB217_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB217_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB217_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s1, a1
+; RV32IA-NEXT:    j .LBB217_4
+; RV32IA-NEXT:  .LBB217_3: # in Loop: Header=BB217_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB217_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB217_1 Depth=1
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB217_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB217_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB217_6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB217_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB217_8
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB217_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB217_8: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB217_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, zero
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB217_1
+; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s5, 8(sp)
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i64* %a, i64 %b release
   ret i64 %1
 }
@@ -6062,6 +10217,71 @@
 ; RV32I-NEXT:    lw ra, 44(sp)
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i64_acq_rel:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -48
+; RV32IA-NEXT:    sw ra, 44(sp)
+; RV32IA-NEXT:    sw s1, 40(sp)
+; RV32IA-NEXT:    sw s2, 36(sp)
+; RV32IA-NEXT:    sw s3, 32(sp)
+; RV32IA-NEXT:    sw s4, 28(sp)
+; RV32IA-NEXT:    sw s5, 24(sp)
+; RV32IA-NEXT:    sw s6, 20(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    addi s4, sp, 8
+; RV32IA-NEXT:    addi s5, zero, 4
+; RV32IA-NEXT:    addi s6, zero, 2
+; RV32IA-NEXT:  .LBB218_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB218_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB218_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s1, a1
+; RV32IA-NEXT:    j .LBB218_4
+; RV32IA-NEXT:  .LBB218_3: # in Loop: Header=BB218_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB218_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB218_1 Depth=1
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 8(sp)
+; RV32IA-NEXT:    bnez a0, .LBB218_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB218_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB218_6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB218_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB218_8
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB218_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB218_8: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB218_1 Depth=1
+; RV32IA-NEXT:    sw a1, 12(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, s6
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 12(sp)
+; RV32IA-NEXT:    lw a2, 8(sp)
+; RV32IA-NEXT:    beqz a0, .LBB218_1
+; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s6, 20(sp)
+; RV32IA-NEXT:    lw s5, 24(sp)
+; RV32IA-NEXT:    lw s4, 28(sp)
+; RV32IA-NEXT:    lw s3, 32(sp)
+; RV32IA-NEXT:    lw s2, 36(sp)
+; RV32IA-NEXT:    lw s1, 40(sp)
+; RV32IA-NEXT:    lw ra, 44(sp)
+; RV32IA-NEXT:    addi sp, sp, 48
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i64* %a, i64 %b acq_rel
   ret i64 %1
 }
@@ -6128,6 +10348,68 @@
 ; RV32I-NEXT:    lw ra, 28(sp)
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomicrmw_umin_i64_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -32
+; RV32IA-NEXT:    sw ra, 28(sp)
+; RV32IA-NEXT:    sw s1, 24(sp)
+; RV32IA-NEXT:    sw s2, 20(sp)
+; RV32IA-NEXT:    sw s3, 16(sp)
+; RV32IA-NEXT:    sw s4, 12(sp)
+; RV32IA-NEXT:    sw s5, 8(sp)
+; RV32IA-NEXT:    mv s1, a2
+; RV32IA-NEXT:    mv s2, a1
+; RV32IA-NEXT:    mv s3, a0
+; RV32IA-NEXT:    lw a1, 4(a0)
+; RV32IA-NEXT:    lw a2, 0(a0)
+; RV32IA-NEXT:    mv s4, sp
+; RV32IA-NEXT:    addi s5, zero, 5
+; RV32IA-NEXT:  .LBB219_1: # %atomicrmw.start
+; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT:    beq a1, s1, .LBB219_3
+; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB219_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s1, a1
+; RV32IA-NEXT:    j .LBB219_4
+; RV32IA-NEXT:  .LBB219_3: # in Loop: Header=BB219_1 Depth=1
+; RV32IA-NEXT:    sltu a0, s2, a2
+; RV32IA-NEXT:  .LBB219_4: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB219_1 Depth=1
+; RV32IA-NEXT:    xori a0, a0, 1
+; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    bnez a0, .LBB219_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB219_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
+; RV32IA-NEXT:  .LBB219_6: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB219_1 Depth=1
+; RV32IA-NEXT:    mv a3, a1
+; RV32IA-NEXT:    bnez a0, .LBB219_8
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB219_1 Depth=1
+; RV32IA-NEXT:    mv a3, s1
+; RV32IA-NEXT:  .LBB219_8: # %atomicrmw.start
+; RV32IA-NEXT:    # in Loop: Header=BB219_1 Depth=1
+; RV32IA-NEXT:    sw a1, 4(sp)
+; RV32IA-NEXT:    mv a0, s3
+; RV32IA-NEXT:    mv a1, s4
+; RV32IA-NEXT:    mv a4, s5
+; RV32IA-NEXT:    mv a5, s5
+; RV32IA-NEXT:    call __atomic_compare_exchange_8
+; RV32IA-NEXT:    lw a1, 4(sp)
+; RV32IA-NEXT:    lw a2, 0(sp)
+; RV32IA-NEXT:    beqz a0, .LBB219_1
+; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:    mv a0, a2
+; RV32IA-NEXT:    lw s5, 8(sp)
+; RV32IA-NEXT:    lw s4, 12(sp)
+; RV32IA-NEXT:    lw s3, 16(sp)
+; RV32IA-NEXT:    lw s2, 20(sp)
+; RV32IA-NEXT:    lw s1, 24(sp)
+; RV32IA-NEXT:    lw ra, 28(sp)
+; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    ret
   %1 = atomicrmw umin i64* %a, i64 %b seq_cst
   ret i64 %1
 }