RegAllocGreedy: Allow last chance recolor to retry overlapping tuples Last chance recoloring didn't try recoloring a done register with the same class since it believed there was no point. This doesn't necessarily apply if the members in that class overlap. Allow the recoloring to proceed if the assigned interfering physical register overlaps with the candidate register. This avoids an allocation failure with overlapping tuples. This testcase could be handled better, and I don't believe should reach last chance recoloring. The failure only manifests with the mutually unsatisfiable register hints to overlapping tuples. The earlier assignment decisions probably should have figured out that using these hints was a bad idea. GitOrigin-RevId: 7714e0317520207572168388f22012dd9e152e9e

commit: e6882a7582ab0ac5aacf14e7f1a74bba703fd678 [log] [tgz]
author: Matt Arsenault <Matthew.Arsenault@amd.com> Fri Apr 01 09:59:05 2022 -0400
committer: Copybara-Service <copybara-worker@google.com> Mon Apr 25 14:12:19 2022 -0700
tree: 403c085db3ee8b7c094d0d8bb188a45dd90b5a73
parent: 64c10e29856ab3015b15c6cde3078184fca1506f [diff] [blame]
diff --git a/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir b/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir
new file mode 100644
index 0000000..09be927
--- /dev/null
+++ b/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir

@@ -0,0 +1,84 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=greedy -o - %s | FileCheck %s
+
+# This testcase is restricted to use a maximum of 24 VGPRs. It is
+# therefore possible to allocate a maximum of 3 vreg_256s at a
+# time. The apparent number of registers in the class is larger, but
+# each one overlaps with the next. Allocating a vreg_64 will prevent a
+# full vreg_256 from being live at a given point.
+
+# The hints are trying to force allocation of overlapping vreg_256s
+# which cannot be satisfied. The last S_NOP in %bb.0 with 2 vreg_256s
+# and a vreg_64 use can be satisfied as long as the hints are ignored.
+
+# With the resulting allocation order, this ends up using last chance
+# recoloring for a vreg_256. We should try to recolor for completed
+# virtual registers with the same class, since the existing assignment
+# can only be corrected by adjusting to a non-overlapping register.
+
+--- |
+  define void @recolor_impossible_hint() #0 {
+    ret void
+  }
+
+  attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
+---
+
+---
+name:            recolor_impossible_hint
+alignment:       1
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vreg_256, preferred-register: '$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7' }
+  - { id: 1, class: vreg_256, preferred-register: '$vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8' }
+  - { id: 2, class: vreg_256, preferred-register: '$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9' }
+  - { id: 3, class: vreg_256, preferred-register: '$vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10' }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  occupancy:       10
+body:             |
+  ; CHECK-LABEL: name: recolor_impossible_hint
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_NOP 0, implicit-def %7, implicit-def %19, implicit-def %5
+  ; CHECK-NEXT:   SI_SPILL_V256_SAVE %19, %stack.3, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.3, align 4, addrspace 5)
+  ; CHECK-NEXT:   SI_SPILL_V256_SAVE %7, %stack.1, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.1, align 4, addrspace 5)
+  ; CHECK-NEXT:   SI_SPILL_V256_SAVE %5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5)
+  ; CHECK-NEXT:   S_NOP 0, implicit-def %17
+  ; CHECK-NEXT:   SI_SPILL_V256_SAVE %17, %stack.2, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.2, align 4, addrspace 5)
+  ; CHECK-NEXT:   S_NOP 0, implicit-def %4
+  ; CHECK-NEXT:   [[SI_SPILL_V256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.1, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[SI_SPILL_V256_RESTORE1:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.3, align 4, addrspace 5)
+  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V256_RESTORE]], implicit [[SI_SPILL_V256_RESTORE1]], implicit %4
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vreg_256 = COPY [[SI_SPILL_V256_RESTORE1]]
+  ; CHECK-NEXT:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_NOP 0, implicit [[COPY]]
+  ; CHECK-NEXT:   [[SI_SPILL_V256_RESTORE2:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
+  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V256_RESTORE2]]
+  ; CHECK-NEXT:   [[SI_SPILL_V256_RESTORE3:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.2, align 4, addrspace 5)
+  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V256_RESTORE3]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    S_NOP 0, implicit-def %0:vreg_256, implicit-def %1:vreg_256, implicit-def %2:vreg_256
+    S_NOP 0, implicit-def %3:vreg_256
+    S_NOP 0, implicit-def %4:vreg_64
+    S_NOP 0, implicit %0, implicit %1, implicit %4
+    S_CBRANCH_EXECNZ %bb.3, implicit $exec
+
+  bb.2:
+    S_NOP 0, implicit %1
+    S_NOP 0, implicit %2
+    S_NOP 0, implicit %3
+
+  bb.3:
+    S_ENDPGM 0
+
+...
commit	e6882a7582ab0ac5aacf14e7f1a74bba703fd678	[log] [tgz]
author	Matt Arsenault <Matthew.Arsenault@amd.com>	Fri Apr 01 09:59:05 2022 -0400
committer	Copybara-Service <copybara-worker@google.com>	Mon Apr 25 14:12:19 2022 -0700
tree	403c085db3ee8b7c094d0d8bb188a45dd90b5a73
parent	64c10e29856ab3015b15c6cde3078184fca1506f [diff] [blame]