llvm/test/CodeGen/AMDGPU/inline-asm.ll - llvm-project - Git at Google

 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck  --check-prefix=CHECK %s

 ; CHECK-LABEL: {{^}}inline_asm:
 ; CHECK: s_endpgm
 ; CHECK: s_endpgm
 define amdgpu_kernel void @inline_asm(ptr addrspace(1) %out) {
 entry:
   store i32 5, ptr addrspace(1) %out
   call void asm sideeffect "s_endpgm", ""()
   ret void
 }

 ; CHECK-LABEL: {{^}}inline_asm_shader:
 ; CHECK: s_endpgm
 ; CHECK: s_endpgm
 define amdgpu_ps void @inline_asm_shader() {
 entry:
   call void asm sideeffect "s_endpgm", ""()
   ret void
 }


 ; CHECK-LABEL: {{^}}branch_on_asm_vgpr:
 ; Make sure VGPR inline assembly is treated as divergent.
 ; CHECK: v_mov_b32 v{{[0-9]+}}, 0
 ; CHECK: v_cmp_eq_u32
 ; CHECK: s_and_saveexec_b64
 define amdgpu_kernel void @branch_on_asm_vgpr(ptr addrspace(1) %out) {
 	%zero = call i32 asm "v_mov_b32 $0, 0", "=v"()
 	%cmp = icmp eq i32 %zero, 0
 	br i1 %cmp, label %if, label %endif

 if:
 	store i32 0, ptr addrspace(1) %out
 	br label %endif

 endif:
   ret void
 }

 ; CHECK-LABEL: {{^}}branch_on_asm_sgpr:
 ; Make sure SGPR inline assembly is treated as uniform
 ; CHECK: s_mov_b32 s{{[0-9]+}}, 0
 ; CHECK: s_cmp_lg_u32
 ; CHECK: s_cbranch_scc0
 define amdgpu_kernel void @branch_on_asm_sgpr(ptr addrspace(1) %out) {
 	%zero = call i32 asm "s_mov_b32 $0, 0", "=s"()
 	%cmp = icmp eq i32 %zero, 0
 	br i1 %cmp, label %if, label %endif

 if:
 	store i32 0, ptr addrspace(1) %out
 	br label %endif

 endif:
   ret void
 }

 ; CHECK-LABEL: {{^}}v_cmp_asm:
 ; CHECK: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
 ; CHECK: v_cmp_ne_u32_e64 s[[[MASK_LO:[0-9]+]]:[[MASK_HI:[0-9]+]]], 0, [[SRC]]
 ; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[MASK_LO]]
 ; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[MASK_HI]]
 ; CHECK: buffer_store_dwordx2 v[[[V_LO]]:[[V_HI]]]
 define amdgpu_kernel void @v_cmp_asm(ptr addrspace(1) %out, i32 %in) {
   %sgpr = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 %in)
   store i64 %sgpr, ptr addrspace(1) %out
   ret void
 }

 ; CHECK-LABEL: {{^}}code_size_inline_asm:
 ; CHECK: codeLenInByte = 12
 define amdgpu_kernel void @code_size_inline_asm(ptr addrspace(1) %out) {
 entry:
   call void asm sideeffect "v_nop_e64", ""()
   ret void
 }

 ; All inlineasm instructions are assumed to be the maximum size
 ; CHECK-LABEL: {{^}}code_size_inline_asm_small_inst:
 ; CHECK: codeLenInByte = 12
 define amdgpu_kernel void @code_size_inline_asm_small_inst(ptr addrspace(1) %out) {
 entry:
   call void asm sideeffect "v_nop_e32", ""()
   ret void
 }

 ; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst:
 ; CHECK: codeLenInByte = 20
 define amdgpu_kernel void @code_size_inline_asm_2_inst(ptr addrspace(1) %out) {
 entry:
   call void asm sideeffect "
     v_nop_e64
     v_nop_e64
    ", ""()
   ret void
 }

 ; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst_extra_newline:
 ; CHECK: codeLenInByte = 20
 define amdgpu_kernel void @code_size_inline_asm_2_inst_extra_newline(ptr addrspace(1) %out) {
 entry:
   call void asm sideeffect "
     v_nop_e64

     v_nop_e64
    ", ""()
   ret void
 }

 ; CHECK-LABEL: {{^}}code_size_inline_asm_0_inst:
 ; CHECK: codeLenInByte = 4
 define amdgpu_kernel void @code_size_inline_asm_0_inst(ptr addrspace(1) %out) {
 entry:
   call void asm sideeffect "", ""()
   ret void
 }

 ; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment:
 ; CHECK: codeLenInByte = 4
 define amdgpu_kernel void @code_size_inline_asm_1_comment(ptr addrspace(1) %out) {
 entry:
   call void asm sideeffect "; comment", ""()
   ret void
 }

 ; CHECK-LABEL: {{^}}code_size_inline_asm_newline_1_comment:
 ; CHECK: codeLenInByte = 4
 define amdgpu_kernel void @code_size_inline_asm_newline_1_comment(ptr addrspace(1) %out) {
 entry:
   call void asm sideeffect "
 ; comment", ""()
   ret void
 }

 ; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment_newline:
 ; CHECK: codeLenInByte = 4
 define amdgpu_kernel void @code_size_inline_asm_1_comment_newline(ptr addrspace(1) %out) {
 entry:
   call void asm sideeffect "; comment
 ", ""()
   ret void
 }

 ; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line:
 ; CHECK: codeLenInByte = 4
 define amdgpu_kernel void @code_size_inline_asm_2_comments_line(ptr addrspace(1) %out) {
 entry:
   call void asm sideeffect "; first comment ; second comment", ""()
   ret void
 }

 ; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line_nospace:
 ; CHECK: codeLenInByte = 4
 define amdgpu_kernel void @code_size_inline_asm_2_comments_line_nospace(ptr addrspace(1) %out) {
 entry:
   call void asm sideeffect "; first comment;second comment", ""()
   ret void
 }

 ; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments0:
 ; CHECK: codeLenInByte = 20
 define amdgpu_kernel void @code_size_inline_asm_mixed_comments0(ptr addrspace(1) %out) {
 entry:
   call void asm sideeffect "; comment
     v_nop_e64 ; inline comment
 ; separate comment
     v_nop_e64

     ; trailing comment
     ; extra comment
   ", ""()
   ret void
 }

 ; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments1:
 ; CHECK: codeLenInByte = 20
 define amdgpu_kernel void @code_size_inline_asm_mixed_comments1(ptr addrspace(1) %out) {
 entry:
   call void asm sideeffect "v_nop_e64 ; inline comment
 ; separate comment
     v_nop_e64

     ; trailing comment
     ; extra comment
   ", ""()
   ret void
 }

 ; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments_operands:
 ; CHECK: codeLenInByte = 20
 define amdgpu_kernel void @code_size_inline_asm_mixed_comments_operands(ptr addrspace(1) %out) {
 entry:
   call void asm sideeffect "; comment
     v_add_i32_e32 v0, vcc, v1, v2 ; inline comment
 ; separate comment
     v_bfrev_b32_e32 v0, 1

     ; trailing comment
     ; extra comment
   ", ""()
   ret void
 }

 ; FIXME: Should not have intermediate sgprs
 ; CHECK-LABEL: {{^}}i64_imm_input_phys_vgpr:
 ; CHECK: v_mov_b32_e32 v0, 0x1e240
 ; CHECK: v_mov_b32_e32 v1, 0
 ; CHECK: use v[0:1]
 define amdgpu_kernel void @i64_imm_input_phys_vgpr() {
 entry:
   call void asm sideeffect "; use $0 ", "{v[0:1]}"(i64 123456)
   ret void
 }

 ; CHECK-LABEL: {{^}}i1_imm_input_phys_vgpr:
 ; CHECK: v_mov_b32_e32 v0, 1{{$}}
 ; CHECK: ; use v0
 define amdgpu_kernel void @i1_imm_input_phys_vgpr() {
 entry:
   call void asm sideeffect "; use $0 ", "{v0}"(i1 true)
   ret void
 }


 ; FIXME: This behavior is nonsense. We should probably disallow i1 asm

 ; CHECK-LABEL: {{^}}i1_input_phys_vgpr:
 ; CHECK: {{buffer|flat}}_load_ubyte [[LOAD:v[0-9]+]]
 ; CHECK-NOT: [[LOAD]]
 ; CHECK: ; use v0
 ; CHECK: v_and_b32_e32 [[STORE:v[0-9]+]], 1, v1
 ; CHECK: {{buffer|flat}}_store_byte [[STORE]],
 define amdgpu_kernel void @i1_input_phys_vgpr() {
 entry:
   %val = load i1, ptr addrspace(1) undef
   %cc = call i1 asm sideeffect "; use $1, def $0 ", "={v1}, {v0}"(i1 %val)
   store i1 %cc, ptr addrspace(1) undef
   ret void
 }

 ; FIXME: Should prodbably be masking high bits of load.
 ; CHECK-LABEL: {{^}}i1_input_phys_vgpr_x2:
 ; CHECK: buffer_load_ubyte v0
 ; CHECK-NEXT: s_waitcnt
 ; CHECK-NEXT: buffer_load_ubyte v1
 ; CHECK-NEXT: s_waitcnt
 ; CHECK-NEXT: ASMSTART
 define amdgpu_kernel void @i1_input_phys_vgpr_x2() {
 entry:
   %val0 = load volatile i1, ptr addrspace(1) undef
   %val1 = load volatile i1, ptr addrspace(1) undef
   call void asm sideeffect "; use $0 $1 ", "{v0}, {v1}"(i1 %val0, i1 %val1)
   ret void
 }

 ; CHECK-LABEL: {{^}}muliple_def_phys_vgpr:
 ; CHECK: ; def v0
 ; CHECK: v_mov_b32_e32 v1, v0
 ; CHECK: ; def v0
 ; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1
 define amdgpu_kernel void @muliple_def_phys_vgpr() {
 entry:
   %def0 = call i32 asm sideeffect "; def $0 ", "={v0}"()
   %def1 = call i32 asm sideeffect "; def $0 ", "={v0}"()
   %add = shl i32 %def0, %def1
   store i32 %add, ptr addrspace(1) undef
   ret void
 }

 ; CHECK-LABEL: {{^}}asm_constraint_c_n:
 ; CHECK: s_trap 10{{$}}
 define amdgpu_kernel void @asm_constraint_c_n()  {
 entry:
   tail call void asm sideeffect "s_trap ${0:c}", "n"(i32 10) #1
   ret void
 }

 ; CHECK-LABEL: {{^}}asm_constraint_n_n:
 ; CHECK: s_trap -10{{$}}
 define amdgpu_kernel void @asm_constraint_n_n()  {
 entry:
   tail call void asm sideeffect "s_trap ${0:n}", "n"(i32 10) #1
   ret void
 }

 ; Make sure tuples of 3 SGPRs are printed with the [] syntax instead
 ; of the tablegen default.
 ; CHECK-LABEL: {{^}}sgpr96_name_format:
 ; CHECK: ; sgpr96 s[0:2]
 define amdgpu_kernel void @sgpr96_name_format()  {
 entry:
   tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
   ret void
 }

 ; Check aggregate types are handled properly.
 ; CHECK-LABEL: mad_u64
 ; CHECK: v_mad_u64_u32
 define void @mad_u64(i32 %x, i1 %c0) {
 entry:
   br i1 %c0, label %exit, label %false

 false:
   %s0 = tail call { i64, i64 } asm sideeffect "v_mad_u64_u32 $0, $1, $2, $3, $4", "=v,=s,v,v,v"(i32 -766435501, i32 %x, i64 0)
   br label %exit

 exit:
   %s1 = phi { i64, i64} [ poison, %entry ], [ %s0, %false]
   %v0 = extractvalue { i64, i64 } %s1, 0
   %v1 = extractvalue { i64, i64 } %s1, 1
   tail call void asm sideeffect "; use $0", "v"(i64 %v0)
   tail call void asm sideeffect "; use $0", "v"(i64 %v1)
   ret void
 }

 ; CHECK-LABEL: {{^}}scc_as_i32:
 ; CHECK: ; def scc
 ; CHECK: ; use scc
 define void @scc_as_i32() {
   %scc = call i32 asm sideeffect "; def $0", "={scc}"()
   call void asm sideeffect "; use $0 ", "{scc}"(i32 %scc)
   ret void
 }

 ; CHECK-LABEL: {{^}}scc_as_i1:
 ; CHECK: ; def scc
 ; CHECK: ; use scc
 define void @scc_as_i1() {
   %scc = call i1 asm sideeffect "; def $0", "={scc}"()
   call void asm sideeffect "; use $0 ", "{scc}"(i1 %scc)
   ret void
 }

 ; Make sure the SGPR def is treated as a uniform value when the inline
 ; assembly also defines a divergent value. The add should be scalar
 ; and not introduce illegal vgpr to sgpr copies.
 ; CHECK-LABEL: {{^}}mixed_def_vgpr_sgpr_def_asm:
 ; CHECK: ; def v0 s[4:5]
 ; CHECK: s_add_u32
 ; CHECK-NEXT: s_addc_u32
 ; CHECK: ; use s[4:5]
 define void @mixed_def_vgpr_sgpr_def_asm() {
   %vgpr_sgpr = call { i32, i64 } asm sideeffect "; def $0 $1 ", "=v,={s[4:5]}"()
   %vgpr = extractvalue { i32, i64 } %vgpr_sgpr, 0
   %sgpr = extractvalue { i32, i64 } %vgpr_sgpr, 1
   %sgpr.add = add i64 %sgpr, 2
   call void asm sideeffect "; use $0 ", "{s[4:5]}"(i64 %sgpr.add)
   ret void
 }

 ; CHECK-LABEL: {{^}}mixed_def_sgpr_vgpr_def_asm:
 ; CHECK: ; def s[4:5] v0
 ; CHECK: s_add_u32
 ; CHECK-NEXT: s_addc_u32
 ; CHECK: ; use s[4:5]
 define void @mixed_def_sgpr_vgpr_def_asm() {
   %sgpr_vgpr = call { i64, i32 } asm sideeffect "; def $0 $1 ", "={s[4:5]},=v"()
   %sgpr = extractvalue { i64, i32 } %sgpr_vgpr, 0
   %vgpr = extractvalue { i64, i32 } %sgpr_vgpr, 1
   %sgpr.add = add i64 %sgpr, 2
   call void asm sideeffect "; use $0 ", "{s[4:5]}"(i64 %sgpr.add)
   ret void
 }
	; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s \| FileCheck --check-prefix=CHECK %s
	; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck --check-prefix=CHECK %s

	; CHECK-LABEL: {{^}}inline_asm:
	; CHECK: s_endpgm
	; CHECK: s_endpgm
	define amdgpu_kernel void @inline_asm(ptr addrspace(1) %out) {
	entry:
	store i32 5, ptr addrspace(1) %out
	call void asm sideeffect "s_endpgm", ""()
	ret void
	}

	; CHECK-LABEL: {{^}}inline_asm_shader:
	; CHECK: s_endpgm
	; CHECK: s_endpgm
	define amdgpu_ps void @inline_asm_shader() {
	entry:
	call void asm sideeffect "s_endpgm", ""()
	ret void
	}


	; CHECK-LABEL: {{^}}branch_on_asm_vgpr:
	; Make sure VGPR inline assembly is treated as divergent.
	; CHECK: v_mov_b32 v{{[0-9]+}}, 0
	; CHECK: v_cmp_eq_u32
	; CHECK: s_and_saveexec_b64
	define amdgpu_kernel void @branch_on_asm_vgpr(ptr addrspace(1) %out) {
	%zero = call i32 asm "v_mov_b32 $0, 0", "=v"()
	%cmp = icmp eq i32 %zero, 0
	br i1 %cmp, label %if, label %endif

	if:
	store i32 0, ptr addrspace(1) %out
	br label %endif

	endif:
	ret void
	}

	; CHECK-LABEL: {{^}}branch_on_asm_sgpr:
	; Make sure SGPR inline assembly is treated as uniform
	; CHECK: s_mov_b32 s{{[0-9]+}}, 0
	; CHECK: s_cmp_lg_u32
	; CHECK: s_cbranch_scc0
	define amdgpu_kernel void @branch_on_asm_sgpr(ptr addrspace(1) %out) {
	%zero = call i32 asm "s_mov_b32 $0, 0", "=s"()
	%cmp = icmp eq i32 %zero, 0
	br i1 %cmp, label %if, label %endif

	if:
	store i32 0, ptr addrspace(1) %out
	br label %endif

	endif:
	ret void
	}

	; CHECK-LABEL: {{^}}v_cmp_asm:
	; CHECK: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
	; CHECK: v_cmp_ne_u32_e64 s[[[MASK_LO:[0-9]+]]:[[MASK_HI:[0-9]+]]], 0, [[SRC]]
	; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[MASK_LO]]
	; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[MASK_HI]]
	; CHECK: buffer_store_dwordx2 v[[[V_LO]]:[[V_HI]]]
	define amdgpu_kernel void @v_cmp_asm(ptr addrspace(1) %out, i32 %in) {
	%sgpr = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 %in)
	store i64 %sgpr, ptr addrspace(1) %out
	ret void
	}

	; CHECK-LABEL: {{^}}code_size_inline_asm:
	; CHECK: codeLenInByte = 12
	define amdgpu_kernel void @code_size_inline_asm(ptr addrspace(1) %out) {
	entry:
	call void asm sideeffect "v_nop_e64", ""()
	ret void
	}

	; All inlineasm instructions are assumed to be the maximum size
	; CHECK-LABEL: {{^}}code_size_inline_asm_small_inst:
	; CHECK: codeLenInByte = 12
	define amdgpu_kernel void @code_size_inline_asm_small_inst(ptr addrspace(1) %out) {
	entry:
	call void asm sideeffect "v_nop_e32", ""()
	ret void
	}

	; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst:
	; CHECK: codeLenInByte = 20
	define amdgpu_kernel void @code_size_inline_asm_2_inst(ptr addrspace(1) %out) {
	entry:
	call void asm sideeffect "
	v_nop_e64
	v_nop_e64
	", ""()
	ret void
	}

	; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst_extra_newline:
	; CHECK: codeLenInByte = 20
	define amdgpu_kernel void @code_size_inline_asm_2_inst_extra_newline(ptr addrspace(1) %out) {
	entry:
	call void asm sideeffect "
	v_nop_e64

	v_nop_e64
	", ""()
	ret void
	}

	; CHECK-LABEL: {{^}}code_size_inline_asm_0_inst:
	; CHECK: codeLenInByte = 4
	define amdgpu_kernel void @code_size_inline_asm_0_inst(ptr addrspace(1) %out) {
	entry:
	call void asm sideeffect "", ""()
	ret void
	}

	; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment:
	; CHECK: codeLenInByte = 4
	define amdgpu_kernel void @code_size_inline_asm_1_comment(ptr addrspace(1) %out) {
	entry:
	call void asm sideeffect "; comment", ""()
	ret void
	}

	; CHECK-LABEL: {{^}}code_size_inline_asm_newline_1_comment:
	; CHECK: codeLenInByte = 4
	define amdgpu_kernel void @code_size_inline_asm_newline_1_comment(ptr addrspace(1) %out) {
	entry:
	call void asm sideeffect "
	; comment", ""()
	ret void
	}

	; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment_newline:
	; CHECK: codeLenInByte = 4
	define amdgpu_kernel void @code_size_inline_asm_1_comment_newline(ptr addrspace(1) %out) {
	entry:
	call void asm sideeffect "; comment
	", ""()
	ret void
	}

	; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line:
	; CHECK: codeLenInByte = 4
	define amdgpu_kernel void @code_size_inline_asm_2_comments_line(ptr addrspace(1) %out) {
	entry:
	call void asm sideeffect "; first comment ; second comment", ""()
	ret void
	}

	; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line_nospace:
	; CHECK: codeLenInByte = 4
	define amdgpu_kernel void @code_size_inline_asm_2_comments_line_nospace(ptr addrspace(1) %out) {
	entry:
	call void asm sideeffect "; first comment;second comment", ""()
	ret void
	}

	; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments0:
	; CHECK: codeLenInByte = 20
	define amdgpu_kernel void @code_size_inline_asm_mixed_comments0(ptr addrspace(1) %out) {
	entry:
	call void asm sideeffect "; comment
	v_nop_e64 ; inline comment
	; separate comment
	v_nop_e64

	; trailing comment
	; extra comment
	", ""()
	ret void
	}

	; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments1:
	; CHECK: codeLenInByte = 20
	define amdgpu_kernel void @code_size_inline_asm_mixed_comments1(ptr addrspace(1) %out) {
	entry:
	call void asm sideeffect "v_nop_e64 ; inline comment
	; separate comment
	v_nop_e64

	; trailing comment
	; extra comment
	", ""()
	ret void
	}

	; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments_operands:
	; CHECK: codeLenInByte = 20
	define amdgpu_kernel void @code_size_inline_asm_mixed_comments_operands(ptr addrspace(1) %out) {
	entry:
	call void asm sideeffect "; comment
	v_add_i32_e32 v0, vcc, v1, v2 ; inline comment
	; separate comment
	v_bfrev_b32_e32 v0, 1

	; trailing comment
	; extra comment
	", ""()
	ret void
	}

	; FIXME: Should not have intermediate sgprs
	; CHECK-LABEL: {{^}}i64_imm_input_phys_vgpr:
	; CHECK: v_mov_b32_e32 v0, 0x1e240
	; CHECK: v_mov_b32_e32 v1, 0
	; CHECK: use v[0:1]
	define amdgpu_kernel void @i64_imm_input_phys_vgpr() {
	entry:
	call void asm sideeffect "; use $0 ", "{v[0:1]}"(i64 123456)
	ret void
	}

	; CHECK-LABEL: {{^}}i1_imm_input_phys_vgpr:
	; CHECK: v_mov_b32_e32 v0, 1{{$}}
	; CHECK: ; use v0
	define amdgpu_kernel void @i1_imm_input_phys_vgpr() {
	entry:
	call void asm sideeffect "; use $0 ", "{v0}"(i1 true)
	ret void
	}


	; FIXME: This behavior is nonsense. We should probably disallow i1 asm

	; CHECK-LABEL: {{^}}i1_input_phys_vgpr:
	; CHECK: {{buffer\|flat}}_load_ubyte [[LOAD:v[0-9]+]]
	; CHECK-NOT: [[LOAD]]
	; CHECK: ; use v0
	; CHECK: v_and_b32_e32 [[STORE:v[0-9]+]], 1, v1
	; CHECK: {{buffer\|flat}}_store_byte [[STORE]],
	define amdgpu_kernel void @i1_input_phys_vgpr() {
	entry:
	%val = load i1, ptr addrspace(1) undef
	%cc = call i1 asm sideeffect "; use $1, def $0 ", "={v1}, {v0}"(i1 %val)
	store i1 %cc, ptr addrspace(1) undef
	ret void
	}

	; FIXME: Should prodbably be masking high bits of load.
	; CHECK-LABEL: {{^}}i1_input_phys_vgpr_x2:
	; CHECK: buffer_load_ubyte v0
	; CHECK-NEXT: s_waitcnt
	; CHECK-NEXT: buffer_load_ubyte v1
	; CHECK-NEXT: s_waitcnt
	; CHECK-NEXT: ASMSTART
	define amdgpu_kernel void @i1_input_phys_vgpr_x2() {
	entry:
	%val0 = load volatile i1, ptr addrspace(1) undef
	%val1 = load volatile i1, ptr addrspace(1) undef
	call void asm sideeffect "; use $0 $1 ", "{v0}, {v1}"(i1 %val0, i1 %val1)
	ret void
	}

	; CHECK-LABEL: {{^}}muliple_def_phys_vgpr:
	; CHECK: ; def v0
	; CHECK: v_mov_b32_e32 v1, v0
	; CHECK: ; def v0
	; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1
	define amdgpu_kernel void @muliple_def_phys_vgpr() {
	entry:
	%def0 = call i32 asm sideeffect "; def $0 ", "={v0}"()
	%def1 = call i32 asm sideeffect "; def $0 ", "={v0}"()
	%add = shl i32 %def0, %def1
	store i32 %add, ptr addrspace(1) undef
	ret void
	}

	; CHECK-LABEL: {{^}}asm_constraint_c_n:
	; CHECK: s_trap 10{{$}}
	define amdgpu_kernel void @asm_constraint_c_n() {
	entry:
	tail call void asm sideeffect "s_trap ${0:c}", "n"(i32 10) #1
	ret void
	}

	; CHECK-LABEL: {{^}}asm_constraint_n_n:
	; CHECK: s_trap -10{{$}}
	define amdgpu_kernel void @asm_constraint_n_n() {
	entry:
	tail call void asm sideeffect "s_trap ${0:n}", "n"(i32 10) #1
	ret void
	}

	; Make sure tuples of 3 SGPRs are printed with the [] syntax instead
	; of the tablegen default.
	; CHECK-LABEL: {{^}}sgpr96_name_format:
	; CHECK: ; sgpr96 s[0:2]
	define amdgpu_kernel void @sgpr96_name_format() {
	entry:
	tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
	ret void
	}

	; Check aggregate types are handled properly.
	; CHECK-LABEL: mad_u64
	; CHECK: v_mad_u64_u32
	define void @mad_u64(i32 %x, i1 %c0) {
	entry:
	br i1 %c0, label %exit, label %false

	false:
	%s0 = tail call { i64, i64 } asm sideeffect "v_mad_u64_u32 $0, $1, $2, $3, $4", "=v,=s,v,v,v"(i32 -766435501, i32 %x, i64 0)
	br label %exit

	exit:
	%s1 = phi { i64, i64} [ poison, %entry ], [ %s0, %false]
	%v0 = extractvalue { i64, i64 } %s1, 0
	%v1 = extractvalue { i64, i64 } %s1, 1
	tail call void asm sideeffect "; use $0", "v"(i64 %v0)
	tail call void asm sideeffect "; use $0", "v"(i64 %v1)
	ret void
	}

	; CHECK-LABEL: {{^}}scc_as_i32:
	; CHECK: ; def scc
	; CHECK: ; use scc
	define void @scc_as_i32() {
	%scc = call i32 asm sideeffect "; def $0", "={scc}"()
	call void asm sideeffect "; use $0 ", "{scc}"(i32 %scc)
	ret void
	}

	; CHECK-LABEL: {{^}}scc_as_i1:
	; CHECK: ; def scc
	; CHECK: ; use scc
	define void @scc_as_i1() {
	%scc = call i1 asm sideeffect "; def $0", "={scc}"()
	call void asm sideeffect "; use $0 ", "{scc}"(i1 %scc)
	ret void
	}

	; Make sure the SGPR def is treated as a uniform value when the inline
	; assembly also defines a divergent value. The add should be scalar
	; and not introduce illegal vgpr to sgpr copies.
	; CHECK-LABEL: {{^}}mixed_def_vgpr_sgpr_def_asm:
	; CHECK: ; def v0 s[4:5]
	; CHECK: s_add_u32
	; CHECK-NEXT: s_addc_u32
	; CHECK: ; use s[4:5]
	define void @mixed_def_vgpr_sgpr_def_asm() {
	%vgpr_sgpr = call { i32, i64 } asm sideeffect "; def $0 $1 ", "=v,={s[4:5]}"()
	%vgpr = extractvalue { i32, i64 } %vgpr_sgpr, 0
	%sgpr = extractvalue { i32, i64 } %vgpr_sgpr, 1
	%sgpr.add = add i64 %sgpr, 2
	call void asm sideeffect "; use $0 ", "{s[4:5]}"(i64 %sgpr.add)
	ret void
	}

	; CHECK-LABEL: {{^}}mixed_def_sgpr_vgpr_def_asm:
	; CHECK: ; def s[4:5] v0
	; CHECK: s_add_u32
	; CHECK-NEXT: s_addc_u32
	; CHECK: ; use s[4:5]
	define void @mixed_def_sgpr_vgpr_def_asm() {
	%sgpr_vgpr = call { i64, i32 } asm sideeffect "; def $0 $1 ", "={s[4:5]},=v"()
	%sgpr = extractvalue { i64, i32 } %sgpr_vgpr, 0
	%vgpr = extractvalue { i64, i32 } %sgpr_vgpr, 1
	%sgpr.add = add i64 %sgpr, 2
	call void asm sideeffect "; use $0 ", "{s[4:5]}"(i64 %sgpr.add)
	ret void
	}