AMDGPU: Optimize s_setreg_b32 to s_denorm_mode/s_round_mode

This is a custom inserter because it was less work than teaching
tablegen a way to indicate that it is sometimes OK to have a no side
effect instruction in the output of a side effecting pattern.

The asm is needed to look like a read of the mode register to prevent
it from being deleted. However, there seems to be a bug where the mode
register def instructions are moved across the asm sideeffect by the
post-RA scheduler.

Another oddity is the immediate is formatted differently between
s_denorm_mode and s_round_mode.
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 23ef56a..c8d1542 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -366,6 +366,28 @@
   WIDTH_DEFAULT_ = WIDTH_M1_DEFAULT_ + 1,
 };
 
+enum ModeRegisterMasks : uint32_t {
+  FP_ROUND_MASK = 0xf << 0,  // Bits 0..3
+  FP_DENORM_MASK = 0xf << 4, // Bits 4..7
+  DX10_CLAMP_MASK = 1 << 8,
+  IEEE_MODE_MASK = 1 << 9,
+  LOD_CLAMP_MASK = 1 << 10,
+  DEBUG_MASK = 1 << 11,
+
+  // EXCP_EN fields.
+  EXCP_EN_INVALID_MASK = 1 << 12,
+  EXCP_EN_INPUT_DENORMAL_MASK = 1 << 13,
+  EXCP_EN_FLOAT_DIV0_MASK = 1 << 14,
+  EXCP_EN_OVERFLOW_MASK = 1 << 15,
+  EXCP_EN_UNDERFLOW_MASK = 1 << 16,
+  EXCP_EN_INEXACT_MASK = 1 << 17,
+  EXCP_EN_INT_DIV0_MASK = 1 << 18,
+
+  GPR_IDX_EN_MASK = 1 << 27,
+  VSKIP_MASK = 1 << 28,
+  CSP_MASK = 0x7u << 29 // Bits 29..31
+};
+
 } // namespace Hwreg
 
 namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32.
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 619ce1a..452ff78 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4119,6 +4119,75 @@
     }
 
     return emitGWSMemViolTestLoop(MI, BB);
+  case AMDGPU::S_SETREG_B32: {
+    if (!getSubtarget()->hasDenormModeInst())
+      return BB;
+
+    // Try to optimize cases that only set the denormal mode or rounding mode.
+    //
+    // If the s_setreg_b32 fully sets all of the bits in the rounding mode or
+    // denormal mode to a constant, we can use s_round_mode or s_denorm_mode
+    // instead.
+    //
+    // FIXME: This could be predicates on the immediate, but tablegen doesn't
+    // allow you to have a no side effect instruction in the output of a
+    // sideeffecting pattern.
+
+    // TODO: Should also emit a no side effects pseudo if only FP bits are
+    // touched, even if not all of them or to a variable.
+    unsigned ID, Offset, Width;
+    AMDGPU::Hwreg::decodeHwreg(MI.getOperand(1).getImm(), ID, Offset, Width);
+    if (ID != AMDGPU::Hwreg::ID_MODE)
+      return BB;
+
+    const unsigned WidthMask = maskTrailingOnes<unsigned>(Width);
+    const unsigned SetMask = WidthMask << Offset;
+    unsigned SetDenormOp = 0;
+    unsigned SetRoundOp = 0;
+
+    // The dedicated instructions can only set the whole denorm or round mode at
+    // once, not a subset of bits in either.
+    if (Width == 8 && (SetMask & (AMDGPU::Hwreg::FP_ROUND_MASK |
+                                  AMDGPU::Hwreg::FP_DENORM_MASK)) == SetMask) {
+      // If this fully sets both the round and denorm mode, emit the two
+      // dedicated instructions for these.
+      assert(Offset == 0);
+      SetRoundOp = AMDGPU::S_ROUND_MODE;
+      SetDenormOp = AMDGPU::S_DENORM_MODE;
+    } else if (Width == 4) {
+      if ((SetMask & AMDGPU::Hwreg::FP_ROUND_MASK) == SetMask) {
+        SetRoundOp = AMDGPU::S_ROUND_MODE;
+        assert(Offset == 0);
+      } else if ((SetMask & AMDGPU::Hwreg::FP_DENORM_MASK) == SetMask) {
+        SetDenormOp = AMDGPU::S_DENORM_MODE;
+        assert(Offset == 4);
+      }
+    }
+
+    if (SetRoundOp || SetDenormOp) {
+      MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+      MachineInstr *Def = MRI.getVRegDef(MI.getOperand(0).getReg());
+      if (Def && Def->isMoveImmediate() && Def->getOperand(1).isImm()) {
+        unsigned ImmVal = Def->getOperand(1).getImm();
+        if (SetRoundOp) {
+          BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetRoundOp))
+            .addImm(ImmVal & 0xf);
+
+          // If we also have the denorm mode, get just the denorm mode bits.
+          ImmVal >>= 4;
+        }
+
+        if (SetDenormOp) {
+          BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetDenormOp))
+            .addImm(ImmVal & 0xf);
+        }
+
+        MI.eraseFromParent();
+      }
+    }
+
+    return BB;
+  }
   default:
     return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
   }
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index dbafea5..774b9cf 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -808,6 +808,10 @@
   (outs), (ins SReg_32:$sdst, hwreg:$simm16),
   "$simm16, $sdst",
   [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> {
+
+  // Use custom inserter to optimize some cases to
+  // S_DENORM_MODE/S_ROUND_MODE.
+  let usesCustomInserter = 1;
   let Defs = [MODE];
   let Uses = [MODE];
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
index 72de32e..531495c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
@@ -309,7 +309,7 @@
 ; GFX10-LABEL: test_setreg_full_round_mode_0:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
+; GFX10-NEXT:    s_round_mode 0x0
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -329,7 +329,7 @@
 ; GFX10-LABEL: test_setreg_full_round_mode_1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
+; GFX10-NEXT:    s_round_mode 0x1
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -349,7 +349,7 @@
 ; GFX10-LABEL: test_setreg_full_round_mode_2:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
+; GFX10-NEXT:    s_round_mode 0x2
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -369,7 +369,7 @@
 ; GFX10-LABEL: test_setreg_full_round_mode_4:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
+; GFX10-NEXT:    s_round_mode 0x4
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -389,7 +389,7 @@
 ; GFX10-LABEL: test_setreg_full_round_mode_8:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
+; GFX10-NEXT:    s_round_mode 0x8
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -409,7 +409,7 @@
 ; GFX10-LABEL: test_setreg_full_round_mode_15:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
+; GFX10-NEXT:    s_round_mode 0xf
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -430,7 +430,7 @@
 ; GFX10-LABEL: test_setreg_full_round_mode_42:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 42
+; GFX10-NEXT:    s_round_mode 0xa
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -450,7 +450,7 @@
 ; GFX10-LABEL: test_setreg_full_denorm_mode_0:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 0
+; GFX10-NEXT:    s_denorm_mode 0
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -470,7 +470,7 @@
 ; GFX10-LABEL: test_setreg_full_denorm_mode_1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 1
+; GFX10-NEXT:    s_denorm_mode 1
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -491,7 +491,7 @@
 ; GFX10-LABEL: test_setreg_full_denorm_mode_2:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 2
+; GFX10-NEXT:    s_denorm_mode 2
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -511,7 +511,7 @@
 ; GFX10-LABEL: test_setreg_full_denorm_mode_4:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 4
+; GFX10-NEXT:    s_denorm_mode 4
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -531,7 +531,7 @@
 ; GFX10-LABEL: test_setreg_full_denorm_mode_8:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 8
+; GFX10-NEXT:    s_denorm_mode 8
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -551,7 +551,7 @@
 ; GFX10-LABEL: test_setreg_full_denorm_mode_15:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 15
+; GFX10-NEXT:    s_denorm_mode 15
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -571,7 +571,7 @@
 ; GFX10-LABEL: test_setreg_full_denorm_mode_42:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 42
+; GFX10-NEXT:    s_denorm_mode 10
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -591,10 +591,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 0
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 0)
   call void asm sideeffect "", ""()
@@ -611,10 +612,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 1
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 0
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 1)
   call void asm sideeffect "", ""()
@@ -631,10 +633,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x2
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 2
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 0
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 2)
   call void asm sideeffect "", ""()
@@ -651,10 +654,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x4
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 4
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 0
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 4)
   call void asm sideeffect "", ""()
@@ -671,10 +675,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x8
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 8
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 0
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 8)
   call void asm sideeffect "", ""()
@@ -691,10 +696,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 16
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 1
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 16)
   call void asm sideeffect "", ""()
@@ -711,10 +717,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 32
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 2
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 32)
   call void asm sideeffect "", ""()
@@ -731,10 +738,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 64
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 4
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 64)
   call void asm sideeffect "", ""()
@@ -751,10 +759,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x80
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 8
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 128)
   call void asm sideeffect "", ""()
@@ -771,10 +780,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0xf
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 15
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 0
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 15)
   call void asm sideeffect "", ""()
@@ -791,10 +801,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0xf
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0xff
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 15
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 255)
   call void asm sideeffect "", ""()
@@ -812,10 +823,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x5
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x255
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 5
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 597)
   call void asm sideeffect "", ""()
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
index 934e39e..515b41d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
@@ -309,7 +309,7 @@
 ; GFX10-LABEL: test_setreg_full_round_mode_0:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
+; GFX10-NEXT:    s_round_mode 0x0
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -329,7 +329,7 @@
 ; GFX10-LABEL: test_setreg_full_round_mode_1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
+; GFX10-NEXT:    s_round_mode 0x1
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -349,7 +349,7 @@
 ; GFX10-LABEL: test_setreg_full_round_mode_2:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
+; GFX10-NEXT:    s_round_mode 0x2
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -369,7 +369,7 @@
 ; GFX10-LABEL: test_setreg_full_round_mode_4:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
+; GFX10-NEXT:    s_round_mode 0x4
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -389,7 +389,7 @@
 ; GFX10-LABEL: test_setreg_full_round_mode_8:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
+; GFX10-NEXT:    s_round_mode 0x8
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -409,7 +409,7 @@
 ; GFX10-LABEL: test_setreg_full_round_mode_15:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
+; GFX10-NEXT:    s_round_mode 0xf
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -430,7 +430,7 @@
 ; GFX10-LABEL: test_setreg_full_round_mode_42:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 42
+; GFX10-NEXT:    s_round_mode 0xa
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -450,7 +450,7 @@
 ; GFX10-LABEL: test_setreg_full_denorm_mode_0:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 0
+; GFX10-NEXT:    s_denorm_mode 0
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -470,7 +470,7 @@
 ; GFX10-LABEL: test_setreg_full_denorm_mode_1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 1
+; GFX10-NEXT:    s_denorm_mode 1
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -491,7 +491,7 @@
 ; GFX10-LABEL: test_setreg_full_denorm_mode_2:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 2
+; GFX10-NEXT:    s_denorm_mode 2
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -511,7 +511,7 @@
 ; GFX10-LABEL: test_setreg_full_denorm_mode_4:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 4
+; GFX10-NEXT:    s_denorm_mode 4
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -531,7 +531,7 @@
 ; GFX10-LABEL: test_setreg_full_denorm_mode_8:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 8
+; GFX10-NEXT:    s_denorm_mode 8
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -551,7 +551,7 @@
 ; GFX10-LABEL: test_setreg_full_denorm_mode_15:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 15
+; GFX10-NEXT:    s_denorm_mode 15
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -571,7 +571,7 @@
 ; GFX10-LABEL: test_setreg_full_denorm_mode_42:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 42
+; GFX10-NEXT:    s_denorm_mode 10
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
@@ -591,10 +591,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 0
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 0)
   call void asm sideeffect "", ""()
@@ -611,10 +612,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 1
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 0
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 1)
   call void asm sideeffect "", ""()
@@ -631,10 +633,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x2
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 2
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 0
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 2)
   call void asm sideeffect "", ""()
@@ -651,10 +654,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x4
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 4
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 0
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 4)
   call void asm sideeffect "", ""()
@@ -671,10 +675,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x8
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 8
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 0
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 8)
   call void asm sideeffect "", ""()
@@ -691,10 +696,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 16
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 1
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 16)
   call void asm sideeffect "", ""()
@@ -711,10 +717,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 32
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 2
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 32)
   call void asm sideeffect "", ""()
@@ -731,10 +738,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 64
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 4
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 64)
   call void asm sideeffect "", ""()
@@ -751,10 +759,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x80
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 8
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 128)
   call void asm sideeffect "", ""()
@@ -771,10 +780,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0xf
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 15
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 0
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 15)
   call void asm sideeffect "", ""()
@@ -791,10 +801,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0xf
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0xff
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 15
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 255)
   call void asm sideeffect "", ""()
@@ -812,10 +823,11 @@
 ;
 ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597:
 ; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_round_mode 0x5
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x255
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
+; GFX10-NEXT:    s_denorm_mode 5
 ; GFX10-NEXT:    s_endpgm
   call void @llvm.amdgcn.s.setreg(i32 14337, i32 597)
   call void asm sideeffect "", ""()