| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -disable-separate-const-offset-from-gep=1 -amdgpu-use-sdag-ptradd=1 < %s | FileCheck --check-prefixes=GFX942,GFX942_PTRADD %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -disable-separate-const-offset-from-gep=1 -amdgpu-use-sdag-ptradd=0 < %s | FileCheck --check-prefixes=GFX942,GFX942_LEGACY %s |
| |
| ; Tests for DAG combines and folds related to the ISD::PTRADD SelectionDAG |
| ; opcode. The RUN lines uses -disable-separate-const-offset-from-gep to disable |
| ; similar transformations in that pass. |
| |
| ; Tests reassociation (ptradd N0:(ptradd p, c1), z) where N0 has only one use. |
| define i64 @global_load_ZTwoUses(ptr addrspace(1) %base, i64 %voffset) { |
| ; GFX942-LABEL: global_load_ZTwoUses: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-NEXT: global_load_dwordx2 v[0:1], v[0:1], off offset:24 |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-NEXT: s_setpc_b64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %base, i64 24 |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 %voffset |
| %l = load i64, ptr addrspace(1) %gep1, align 8 |
| %r = add i64 %l, %voffset |
| ret i64 %r |
| } |
| |
| define i64 @global_load_gep_add_reassoc(ptr addrspace(1) %base, i64 %voffset) { |
| ; GFX942_PTRADD-LABEL: global_load_gep_add_reassoc: |
| ; GFX942_PTRADD: ; %bb.0: |
| ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942_PTRADD-NEXT: global_load_dwordx2 v[0:1], v[0:1], off offset:24 |
| ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942_LEGACY-LABEL: global_load_gep_add_reassoc: |
| ; GFX942_LEGACY: ; %bb.0: |
| ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1] |
| ; GFX942_LEGACY-NEXT: global_load_dwordx2 v[0:1], v[0:1], off offset:24 |
| ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31] |
| %add0 = add nuw nsw i64 %voffset, 24 |
| %gep0 = getelementptr nuw inbounds i8, ptr addrspace(1) %base, i64 %add0 |
| %l = load i64, ptr addrspace(1) %gep0, align 8 |
| ret i64 %l |
| } |
| |
| ; Tests reassociation (ptradd (ptradd p, c1), c2) with two constants. These |
| ; would be folded away in most cases, but the index computation introduced by |
| ; the legalization of wide vector stores can for example introduce them. |
| define amdgpu_kernel void @store_v16i32(ptr addrspace(1) %out, <16 x i32> %a) { |
| ; GFX942-LABEL: store_v16i32: |
| ; GFX942: ; %bb.0: ; %entry |
| ; GFX942-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40 |
| ; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; GFX942-NEXT: v_mov_b32_e32 v4, 0 |
| ; GFX942-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX942-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX942-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX942-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX942-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:48 |
| ; GFX942-NEXT: s_nop 1 |
| ; GFX942-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX942-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX942-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX942-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:32 |
| ; GFX942-NEXT: s_nop 1 |
| ; GFX942-NEXT: v_mov_b32_e32 v0, s12 |
| ; GFX942-NEXT: v_mov_b32_e32 v1, s13 |
| ; GFX942-NEXT: v_mov_b32_e32 v2, s14 |
| ; GFX942-NEXT: v_mov_b32_e32 v3, s15 |
| ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 |
| ; GFX942-NEXT: s_nop 1 |
| ; GFX942-NEXT: v_mov_b32_e32 v0, s8 |
| ; GFX942-NEXT: v_mov_b32_e32 v1, s9 |
| ; GFX942-NEXT: v_mov_b32_e32 v2, s10 |
| ; GFX942-NEXT: v_mov_b32_e32 v3, s11 |
| ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] |
| ; GFX942-NEXT: s_endpgm |
| entry: |
| store <16 x i32> %a, ptr addrspace(1) %out |
| ret void |
| } |
| |
| |
| ; Tests the (ptradd 0, x) -> x DAG combine. |
| define void @baseptr_null(i64 %offset, i8 %v) { |
| ; GFX942-LABEL: baseptr_null: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: flat_store_byte v[0:1], v2 |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: s_setpc_b64 s[30:31] |
| %gep = getelementptr i8, ptr null, i64 %offset |
| store i8 %v, ptr %gep, align 1 |
| ret void |
| } |
| |
| ; Taken from implicit-kernarg-backend-usage.ll, tests the PTRADD handling in the |
| ; assertalign DAG combine. |
| define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) #0 { |
| ; GFX942-LABEL: llvm_amdgcn_queue_ptr: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX942-NEXT: global_load_ubyte v0, v2, s[2:3] sc0 sc1 |
| ; GFX942-NEXT: global_load_ubyte v0, v2, s[4:5] offset:8 sc0 sc1 |
| ; GFX942-NEXT: global_load_ubyte v0, v2, s[0:1] sc0 sc1 |
| ; GFX942-NEXT: ; kill: killed $sgpr0_sgpr1 |
| ; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] |
| ; GFX942-NEXT: ; kill: killed $sgpr2_sgpr3 |
| ; GFX942-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: s_endpgm |
| %queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr() |
| %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| %dispatch.id = call i64 @llvm.amdgcn.dispatch.id() |
| %queue.load = load volatile i8, ptr addrspace(4) %queue.ptr |
| %implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr |
| %dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr |
| store volatile i64 %dispatch.id, ptr addrspace(1) %ptr |
| ret void |
| } |
| |
| ; Taken from memcpy-param-combinations.ll, tests PTRADD handling in |
| ; SelectionDAGAddressAnalysis. |
| define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { |
| ; GFX942-LABEL: memcpy_p1_p4_sz16_align_1_1: |
| ; GFX942: ; %bb.0: ; %entry |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| ; Test skipping the lower-32-bit addition if it is unnecessary. |
| define ptr @huge_offset_low_32_unused(ptr %p) { |
| ; GFX942_PTRADD-LABEL: huge_offset_low_32_unused: |
| ; GFX942_PTRADD: ; %bb.0: |
| ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942_PTRADD-NEXT: s_mov_b32 s0, 0 |
| ; GFX942_PTRADD-NEXT: s_mov_b32 s1, 1 |
| ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] |
| ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942_LEGACY-LABEL: huge_offset_low_32_unused: |
| ; GFX942_LEGACY: ; %bb.0: |
| ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942_LEGACY-NEXT: v_add_u32_e32 v1, 1, v1 |
| ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31] |
| %gep = getelementptr inbounds i8, ptr %p, i64 u0x100000000 |
| ret ptr %gep |
| } |
| |
| ; Reassociate address computation if it leads to more scalar operations. |
| define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr addrspace(1) %p, i64 %soffset) { |
| ; GFX942_PTRADD-LABEL: reassoc_scalar_r: |
| ; GFX942_PTRADD: ; %bb.0: ; %entry |
| ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 |
| ; GFX942_PTRADD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942_PTRADD-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7] |
| ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3] |
| ; GFX942_PTRADD-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] |
| ; GFX942_PTRADD-NEXT: s_endpgm |
| ; |
| ; GFX942_LEGACY-LABEL: reassoc_scalar_r: |
| ; GFX942_LEGACY: ; %bb.0: ; %entry |
| ; GFX942_LEGACY-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 |
| ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942_LEGACY-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX942_LEGACY-NEXT: s_add_u32 s2, s2, s6 |
| ; GFX942_LEGACY-NEXT: s_addc_u32 s3, s3, s7 |
| ; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1] |
| ; GFX942_LEGACY-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] |
| ; GFX942_LEGACY-NEXT: s_endpgm |
| entry: |
| %voffset32 = call i32 @llvm.amdgcn.workitem.id.x() |
| %voffset = zext i32 %voffset32 to i64 |
| %offset = add nuw nsw i64 %voffset, %soffset |
| %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset |
| store ptr addrspace(1) %gep, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr addrspace(1) %p, i64 %soffset) { |
| ; GFX942_PTRADD-LABEL: reassoc_scalar_l: |
| ; GFX942_PTRADD: ; %bb.0: ; %entry |
| ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 |
| ; GFX942_PTRADD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942_PTRADD-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1] |
| ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3] |
| ; GFX942_PTRADD-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] |
| ; GFX942_PTRADD-NEXT: s_endpgm |
| ; |
| ; GFX942_LEGACY-LABEL: reassoc_scalar_l: |
| ; GFX942_LEGACY: ; %bb.0: ; %entry |
| ; GFX942_LEGACY-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 |
| ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942_LEGACY-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX942_LEGACY-NEXT: s_add_u32 s2, s2, s6 |
| ; GFX942_LEGACY-NEXT: s_addc_u32 s3, s3, s7 |
| ; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1] |
| ; GFX942_LEGACY-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] |
| ; GFX942_LEGACY-NEXT: s_endpgm |
| entry: |
| %voffset32 = call i32 @llvm.amdgcn.workitem.id.x() |
| %voffset = zext i32 %voffset32 to i64 |
| %offset = add nuw nsw i64 %soffset, %voffset |
| %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset |
| store ptr addrspace(1) %gep, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; Tests the target-specific (ptradd x, shl(0 - y, k)) -> sub(x, shl(y, k)) fold |
| define ptr addrspace(1) @shl_neg_offset(ptr addrspace(1) %p, i64 %noffset, i64 %shift) { |
| ; GFX942_PTRADD-LABEL: shl_neg_offset: |
| ; GFX942_PTRADD: ; %bb.0: |
| ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942_PTRADD-NEXT: v_sub_co_u32_e32 v2, vcc, 0, v2 |
| ; GFX942_PTRADD-NEXT: s_nop 1 |
| ; GFX942_PTRADD-NEXT: v_subb_co_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX942_PTRADD-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3] |
| ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942_LEGACY-LABEL: shl_neg_offset: |
| ; GFX942_LEGACY: ; %bb.0: |
| ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942_LEGACY-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3] |
| ; GFX942_LEGACY-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942_LEGACY-NEXT: s_nop 1 |
| ; GFX942_LEGACY-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc |
| ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31] |
| %offset = sub i64 0, %noffset |
| %x = shl i64 %offset, %shift |
| %gep = getelementptr inbounds i8, ptr addrspace(1) %p, i64 %x |
| ret ptr addrspace(1) %gep |
| } |
| |
| %complextype = type { i64, [10 x i8], float } |
| |
| @v0 = dso_local addrspace(1) global %complextype zeroinitializer |
| |
| ; Check that offsets are folded into global addresses if possible. For example, |
| ; this is relevant when using --amdgpu-lower-module-lds-strategy=table. |
| define ptr addrspace(1) @complextype_global_gep(i64 %offset) { |
| ; GFX942_PTRADD-LABEL: complextype_global_gep: |
| ; GFX942_PTRADD: ; %bb.0: |
| ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942_PTRADD-NEXT: s_getpc_b64 s[0:1] |
| ; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, v0@rel32@lo+4 |
| ; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, v0@rel32@hi+12 |
| ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] |
| ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, 10 |
| ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942_LEGACY-LABEL: complextype_global_gep: |
| ; GFX942_LEGACY: ; %bb.0: |
| ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942_LEGACY-NEXT: s_getpc_b64 s[0:1] |
| ; GFX942_LEGACY-NEXT: s_add_u32 s0, s0, v0@rel32@lo+14 |
| ; GFX942_LEGACY-NEXT: s_addc_u32 s1, s1, v0@rel32@hi+22 |
| ; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] |
| ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31] |
| %gep0 = getelementptr inbounds %complextype, ptr addrspace(1) @v0, i64 0, i32 1, i64 %offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2 |
| ret ptr addrspace(1) %gep1 |
| } |
| |
| %S = type <{ float, double }> |
| |
| ; Tests the tryFoldToMad64_32 PTRADD combine. |
| define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p) { |
| ; GFX942_PTRADD-LABEL: fold_mad64: |
| ; GFX942_PTRADD: ; %bb.0: |
| ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; GFX942_PTRADD-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| ; GFX942_PTRADD-NEXT: v_mul_hi_u32_u24_e32 v1, 12, v0 |
| ; GFX942_PTRADD-NEXT: v_mul_u32_u24_e32 v0, 12, v0 |
| ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] |
| ; GFX942_PTRADD-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX942_PTRADD-NEXT: s_endpgm |
| ; |
| ; GFX942_LEGACY-LABEL: fold_mad64: |
| ; GFX942_LEGACY: ; %bb.0: |
| ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; GFX942_LEGACY-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v2, 1.0 |
| ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX942_LEGACY-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1] |
| ; GFX942_LEGACY-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX942_LEGACY-NEXT: s_endpgm |
| %voffset32 = call i32 @llvm.amdgcn.workitem.id.x() |
| %voffset = zext i32 %voffset32 to i64 |
| %p1 = getelementptr inbounds %S, ptr addrspace(1) %p, i64 %voffset, i32 0 |
| store float 1.0, ptr addrspace(1) %p1 |
| ret void |
| } |