llvm/test/CodeGen/AMDGPU/spill-wide-sgpr.ll - llvm-project - Git at Google

 ; RUN: llc -O0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VGPR %s
 ; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VMEM %s

 ; GCN-LABEL: {{^}}spill_sgpr_x2:

 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
 ; VGPR: s_cbranch_scc1

 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1


 ; VMEM: buffer_store_dword
 ; VMEM: s_cbranch_scc1

 ; VMEM: buffer_load_dword
 define amdgpu_kernel void @spill_sgpr_x2(i32 addrspace(1)* %out, i32 %in) #0 {
   %wide.sgpr = call <2 x i32>  asm sideeffect "; def $0", "=s" () #0
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret

 bb0:
   call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr) #0
   br label %ret

 ret:
   ret void
 }

 ; GCN-LABEL: {{^}}spill_sgpr_x3:

 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
 ; VGPR: s_cbranch_scc1

 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2


 ; VMEM: buffer_store_dword
 ; VMEM: s_cbranch_scc1

 ; VMEM: buffer_load_dword
 define amdgpu_kernel void @spill_sgpr_x3(i32 addrspace(1)* %out, i32 %in) #0 {
   %wide.sgpr = call <3 x i32>  asm sideeffect "; def $0", "=s" () #0
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret

 bb0:
   call void asm sideeffect "; use $0", "s"(<3 x i32> %wide.sgpr) #0
   br label %ret

 ret:
   ret void
 }

 ; GCN-LABEL: {{^}}spill_sgpr_x4:

 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
 ; VGPR: s_cbranch_scc1

 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3


 ; VMEM: buffer_store_dword
 ; VMEM: s_cbranch_scc1

 ; VMEM: buffer_load_dword
 define amdgpu_kernel void @spill_sgpr_x4(i32 addrspace(1)* %out, i32 %in) #0 {
   %wide.sgpr = call <4 x i32>  asm sideeffect "; def $0", "=s" () #0
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret

 bb0:
   call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr) #0
   br label %ret

 ret:
   ret void
 }

 ; GCN-LABEL: {{^}}spill_sgpr_x5:

 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
 ; VGPR: s_cbranch_scc1

 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4


 ; VMEM: buffer_store_dword
 ; VMEM: s_cbranch_scc1

 ; VMEM: buffer_load_dword
 define amdgpu_kernel void @spill_sgpr_x5(i32 addrspace(1)* %out, i32 %in) #0 {
   %wide.sgpr = call <5 x i32>  asm sideeffect "; def $0", "=s" () #0
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret

 bb0:
   call void asm sideeffect "; use $0", "s"(<5 x i32> %wide.sgpr) #0
   br label %ret

 ret:
   ret void
 }

 ; GCN-LABEL: {{^}}spill_sgpr_x8:

 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
 ; VGPR: s_cbranch_scc1

 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7

 ; VMEM: buffer_store_dword
 ; VMEM: s_cbranch_scc1

 ; VMEM: buffer_load_dword
 define amdgpu_kernel void @spill_sgpr_x8(i32 addrspace(1)* %out, i32 %in) #0 {
   %wide.sgpr = call <8 x i32>  asm sideeffect "; def $0", "=s" () #0
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret

 bb0:
   call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr) #0
   br label %ret

 ret:
   ret void
 }

 ; GCN-LABEL: {{^}}spill_sgpr_x16:

 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15
 ; VGPR: s_cbranch_scc1

 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15

 ; VMEM: buffer_store_dword
 ; VMEM: s_cbranch_scc1

 ; VMEM: buffer_load_dword
 define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 {
   %wide.sgpr = call <16 x i32>  asm sideeffect "; def $0", "=s" () #0
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret

 bb0:
   call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0
   br label %ret

 ret:
  ret void
 }

 ; GCN-LABEL: {{^}}spill_sgpr_x32:

 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 16
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 17
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 18
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 19
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 20
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 21
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 22
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 23
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 24
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 25
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 26
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 27
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 28
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 29
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 30
 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 31
 ; VGPR: s_cbranch_scc1

 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 16
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 17
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 18
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 19
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 20
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 21
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 22
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 23
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 24
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 25
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 26
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 27
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 28
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 29
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 30
 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 31

 ; VMEM: buffer_store_dword
 ; VMEM: s_cbranch_scc1

 ; VMEM: buffer_load_dword
 define amdgpu_kernel void @spill_sgpr_x32(i32 addrspace(1)* %out, i32 %in) #0 {
   %wide.sgpr = call <32 x i32>  asm sideeffect "; def $0", "=s" () #0
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret

 bb0:
   call void asm sideeffect "; use $0", "s"(<32 x i32> %wide.sgpr) #0
   br label %ret

 ret:
  ret void
 }

 attributes #0 = { nounwind }
	; RUN: llc -O0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VGPR %s
	; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VMEM %s

	; GCN-LABEL: {{^}}spill_sgpr_x2:

	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
	; VGPR: s_cbranch_scc1

	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1


	; VMEM: buffer_store_dword
	; VMEM: s_cbranch_scc1

	; VMEM: buffer_load_dword
	define amdgpu_kernel void @spill_sgpr_x2(i32 addrspace(1)* %out, i32 %in) #0 {
	%wide.sgpr = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
	%cmp = icmp eq i32 %in, 0
	br i1 %cmp, label %bb0, label %ret

	bb0:
	call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr) #0
	br label %ret

	ret:
	ret void
	}

	; GCN-LABEL: {{^}}spill_sgpr_x3:

	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
	; VGPR: s_cbranch_scc1

	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2


	; VMEM: buffer_store_dword
	; VMEM: s_cbranch_scc1

	; VMEM: buffer_load_dword
	define amdgpu_kernel void @spill_sgpr_x3(i32 addrspace(1)* %out, i32 %in) #0 {
	%wide.sgpr = call <3 x i32> asm sideeffect "; def $0", "=s" () #0
	%cmp = icmp eq i32 %in, 0
	br i1 %cmp, label %bb0, label %ret

	bb0:
	call void asm sideeffect "; use $0", "s"(<3 x i32> %wide.sgpr) #0
	br label %ret

	ret:
	ret void
	}

	; GCN-LABEL: {{^}}spill_sgpr_x4:

	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
	; VGPR: s_cbranch_scc1

	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3


	; VMEM: buffer_store_dword
	; VMEM: s_cbranch_scc1

	; VMEM: buffer_load_dword
	define amdgpu_kernel void @spill_sgpr_x4(i32 addrspace(1)* %out, i32 %in) #0 {
	%wide.sgpr = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
	%cmp = icmp eq i32 %in, 0
	br i1 %cmp, label %bb0, label %ret

	bb0:
	call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr) #0
	br label %ret

	ret:
	ret void
	}

	; GCN-LABEL: {{^}}spill_sgpr_x5:

	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
	; VGPR: s_cbranch_scc1

	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4


	; VMEM: buffer_store_dword
	; VMEM: s_cbranch_scc1

	; VMEM: buffer_load_dword
	define amdgpu_kernel void @spill_sgpr_x5(i32 addrspace(1)* %out, i32 %in) #0 {
	%wide.sgpr = call <5 x i32> asm sideeffect "; def $0", "=s" () #0
	%cmp = icmp eq i32 %in, 0
	br i1 %cmp, label %bb0, label %ret

	bb0:
	call void asm sideeffect "; use $0", "s"(<5 x i32> %wide.sgpr) #0
	br label %ret

	ret:
	ret void
	}

	; GCN-LABEL: {{^}}spill_sgpr_x8:

	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
	; VGPR: s_cbranch_scc1

	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7

	; VMEM: buffer_store_dword
	; VMEM: s_cbranch_scc1

	; VMEM: buffer_load_dword
	define amdgpu_kernel void @spill_sgpr_x8(i32 addrspace(1)* %out, i32 %in) #0 {
	%wide.sgpr = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
	%cmp = icmp eq i32 %in, 0
	br i1 %cmp, label %bb0, label %ret

	bb0:
	call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr) #0
	br label %ret

	ret:
	ret void
	}

	; GCN-LABEL: {{^}}spill_sgpr_x16:

	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15
	; VGPR: s_cbranch_scc1

	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15

	; VMEM: buffer_store_dword
	; VMEM: s_cbranch_scc1

	; VMEM: buffer_load_dword
	define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 {
	%wide.sgpr = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
	%cmp = icmp eq i32 %in, 0
	br i1 %cmp, label %bb0, label %ret

	bb0:
	call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0
	br label %ret

	ret:
	ret void
	}

	; GCN-LABEL: {{^}}spill_sgpr_x32:

	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 16
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 17
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 18
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 19
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 20
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 21
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 22
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 23
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 24
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 25
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 26
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 27
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 28
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 29
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 30
	; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 31
	; VGPR: s_cbranch_scc1

	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 16
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 17
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 18
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 19
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 20
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 21
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 22
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 23
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 24
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 25
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 26
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 27
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 28
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 29
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 30
	; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 31

	; VMEM: buffer_store_dword
	; VMEM: s_cbranch_scc1

	; VMEM: buffer_load_dword
	define amdgpu_kernel void @spill_sgpr_x32(i32 addrspace(1)* %out, i32 %in) #0 {
	%wide.sgpr = call <32 x i32> asm sideeffect "; def $0", "=s" () #0
	%cmp = icmp eq i32 %in, 0
	br i1 %cmp, label %bb0, label %ret

	bb0:
	call void asm sideeffect "; use $0", "s"(<32 x i32> %wide.sgpr) #0
	br label %ret

	ret:
	ret void
	}

	attributes #0 = { nounwind }