| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s |
| |
| ; Test that address space inference works correctly for inttoptr/ptrtoint |
| ; patterns when the pointer manipulation is within the preserved mask. |
| ; For local memory, AMDGPU uses 2^32-aligned apertures, so only the lower |
| ; 32 bits are safe to modify. |
| ; For global memory, flat and global addresses are identical, so all 64 |
| ; bits are preserved. |
| |
| ; Local (shared) memory tests - addrspace(3) |
| |
| define void @test_xor_local(ptr addrspace(3) %sp) { |
| ; CHECK-LABEL: define void @test_xor_local( |
| ; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 4095 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(3) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| %b = xor i64 %a, 4095 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| define void @test_xor_local_max32bit(ptr addrspace(3) %sp) { |
| ; CHECK-LABEL: define void @test_xor_local_max32bit( |
| ; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 4294967295 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(3) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| ; 0xFFFFFFFF - maximum 32-bit value, should still be optimized |
| %b = xor i64 %a, 4294967295 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| ; 0x100000000 tests bit 32 (0-indexed), not bit 33 |
| define void @test_xor_local_fail_bit32(ptr addrspace(3) %sp) { |
| ; CHECK-LABEL: define void @test_xor_local_fail_bit32( |
| ; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 4294967296 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(3) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| ; 0x100000000 - bit 32 set, should NOT be optimized |
| %b = xor i64 %a, 4294967296 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| define void @test_or_local(ptr addrspace(3) %sp) { |
| ; CHECK-LABEL: define void @test_or_local( |
| ; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = or i64 [[A]], 255 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(3) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| %b = or i64 %a, 255 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| define void @test_and_local(ptr addrspace(3) %sp) { |
| ; CHECK-LABEL: define void @test_and_local( |
| ; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = and i64 [[A]], -4096 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(3) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| ; -4096 = 0xFFFFFFFFFFFFF000 - clears low 12 bits, should be optimized |
| %b = and i64 %a, -4096 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| define void @test_and_local_fail(ptr addrspace(3) %sp) { |
| ; CHECK-LABEL: define void @test_and_local_fail( |
| ; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = and i64 [[A]], -4294967297 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(3) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| ; -4294967297 = 0xFFFFFFFEFFFFFFFF - clears bit 32, should NOT be optimized |
| %b = and i64 %a, -4294967297 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| ; Global memory tests - addrspace(1) |
| |
| define void @test_xor_global(ptr addrspace(1) %sp) { |
| ; CHECK-LABEL: define void @test_xor_global( |
| ; CHECK-SAME: ptr addrspace(1) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(1) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 7 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(1) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| %b = xor i64 %a, 7 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| define void @test_xor_global_max32bit(ptr addrspace(1) %sp) { |
| ; CHECK-LABEL: define void @test_xor_global_max32bit( |
| ; CHECK-SAME: ptr addrspace(1) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(1) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 4294967295 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(1) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| ; 0xFFFFFFFF - maximum 32-bit value, should still be optimized |
| %b = xor i64 %a, 4294967295 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| ; For global address space, all 64 bits are preserved because global addresses |
| ; are bit-identical in both global and flat address spaces. Global memory regions |
| ; are architecturally separate from the private/local aperture regions, which use |
| ; specific high-address ranges that don't overlap with valid global addresses. |
| ; 0x100000000 tests bit 32 (0-indexed), not bit 33 |
| define void @test_xor_global_bit32(ptr addrspace(1) %sp) { |
| ; CHECK-LABEL: define void @test_xor_global_bit32( |
| ; CHECK-SAME: ptr addrspace(1) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(1) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 4294967296 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(1) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| ; 0x100000000 - bit 32 set, should be optimized for global (all 64 bits preserved) |
| %b = xor i64 %a, 4294967296 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| define void @test_xor_global_high_bits(ptr addrspace(1) %sp) { |
| ; CHECK-LABEL: define void @test_xor_global_high_bits( |
| ; CHECK-SAME: ptr addrspace(1) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(1) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], -9223372036854775808 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(1) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| ; 0x8000000000000000 - bit 63 set, should be optimized for global |
| %b = xor i64 %a, -9223372036854775808 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| ; Test OR on global (all 64 bits preserved) |
| define void @test_or_global(ptr addrspace(1) %sp) { |
| ; CHECK-LABEL: define void @test_or_global( |
| ; CHECK-SAME: ptr addrspace(1) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(1) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = or i64 [[A]], 255 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(1) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| %b = or i64 %a, 255 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| ; Test AND on global (all 64 bits preserved, even when clearing high bits) |
| define void @test_and_global(ptr addrspace(1) %sp) { |
| ; CHECK-LABEL: define void @test_and_global( |
| ; CHECK-SAME: ptr addrspace(1) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(1) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = and i64 [[A]], 281474976710655 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(1) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| ; 0x0000FFFFFFFFFFFF - clears upper 16 bits, should still optimize for global |
| %b = and i64 %a, 281474976710655 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| ; Test ADD on global - ADD is not currently tracked by InferAddressSpaces |
| ; so this will NOT be optimized (even though it could be safe for global) |
| define void @test_add_global(ptr addrspace(1) %sp) { |
| ; CHECK-LABEL: define void @test_add_global( |
| ; CHECK-SAME: ptr addrspace(1) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(1) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = add i64 [[A]], 4294967296 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(1) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| ; ADD is not tracked - this will NOT be optimized |
| %b = add i64 %a, 4294967296 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| ; Private memory tests - addrspace(5) |
| ; Private aperture is 2^32-aligned, lower 32 bits are safe to modify |
| |
| define void @test_xor_private(ptr addrspace(5) %sp) { |
| ; CHECK-LABEL: define void @test_xor_private( |
| ; CHECK-SAME: ptr addrspace(5) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(5) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 7 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(5) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| %b = xor i64 %a, 7 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| define void @test_xor_private_max32bit(ptr addrspace(5) %sp) { |
| ; CHECK-LABEL: define void @test_xor_private_max32bit( |
| ; CHECK-SAME: ptr addrspace(5) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(5) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 4294967295 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(5) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| ; 0xFFFFFFFF - maximum 32-bit value, should still be optimized |
| %b = xor i64 %a, 4294967295 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| define void @test_xor_private_fail_bit32(ptr addrspace(5) %sp) { |
| ; CHECK-LABEL: define void @test_xor_private_fail_bit32( |
| ; CHECK-SAME: ptr addrspace(5) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(5) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 4294967296 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(5) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| ; 0x100000000 - bit 32 set, should NOT be optimized |
| %b = xor i64 %a, 4294967296 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| ; Constant memory tests - addrspace(4) |
| ; Constant address space uses same addresses as global, all 64 bits preserved |
| |
| define void @test_xor_constant(ptr addrspace(4) %sp) { |
| ; CHECK-LABEL: define void @test_xor_constant( |
| ; CHECK-SAME: ptr addrspace(4) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(4) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 7 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(4) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| %b = xor i64 %a, 7 |
| %gp2 = inttoptr i64 %b to ptr |
| %val = load i16, ptr %gp2, align 2 |
| ret void |
| } |
| |
| define void @test_xor_constant_high_bits(ptr addrspace(4) %sp) { |
| ; CHECK-LABEL: define void @test_xor_constant_high_bits( |
| ; CHECK-SAME: ptr addrspace(4) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(4) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], -9223372036854775808 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(4) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| ; 0x8000000000000000 - bit 63 set, should be optimized for constant |
| %b = xor i64 %a, -9223372036854775808 |
| %gp2 = inttoptr i64 %b to ptr |
| %val = load i16, ptr %gp2, align 2 |
| ret void |
| } |
| |
| ; Test ADD operation on local - ADD is not currently tracked by InferAddressSpaces |
| ; so this will NOT be optimized (limitation of the pass, not the mask) |
| define void @test_add_local(ptr addrspace(3) %sp) { |
| ; CHECK-LABEL: define void @test_add_local( |
| ; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = add i64 [[A]], 64 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(3) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| ; ADD is not tracked - this will NOT be optimized |
| %b = add i64 %a, 64 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| ; Test SUB operation on local - SUB is not currently tracked by InferAddressSpaces |
| define void @test_sub_local(ptr addrspace(3) %sp) { |
| ; CHECK-LABEL: define void @test_sub_local( |
| ; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = sub i64 [[A]], 128 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(3) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| ; SUB is not tracked - this will NOT be optimized |
| %b = sub i64 %a, 128 |
| %gp2 = inttoptr i64 %b to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| ; Complex swizzling pattern |
| define void @test_swizzle_local(ptr addrspace(3) %sp) { |
| ; CHECK-LABEL: define void @test_swizzle_local( |
| ; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr |
| ; CHECK-NEXT: [[T1:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[AND:%.*]] = lshr i64 [[T1]], 8 |
| ; CHECK-NEXT: [[SHR:%.*]] = and i64 [[AND]], 8 |
| ; CHECK-NEXT: [[AND1:%.*]] = lshr i64 [[T1]], 10 |
| ; CHECK-NEXT: [[SHR2:%.*]] = and i64 [[AND1]], 4 |
| ; CHECK-NEXT: [[OR:%.*]] = or i64 [[SHR]], [[SHR2]] |
| ; CHECK-NEXT: [[AND3:%.*]] = lshr i64 [[T1]], 4 |
| ; CHECK-NEXT: [[SHR4:%.*]] = and i64 [[AND3]], 112 |
| ; CHECK-NEXT: [[OR5:%.*]] = or i64 [[OR]], [[SHR4]] |
| ; CHECK-NEXT: [[XOR:%.*]] = xor i64 [[OR5]], [[T1]] |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[XOR]] to ptr |
| ; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(3) %sp to ptr |
| %t1 = ptrtoint ptr %gp to i64 |
| %and = lshr i64 %t1, 8 |
| %shr = and i64 %and, 8 |
| %and1 = lshr i64 %t1, 10 |
| %shr2 = and i64 %and1, 4 |
| %or = or i64 %shr, %shr2 |
| %and3 = lshr i64 %t1, 4 |
| %shr4 = and i64 %and3, 112 |
| %or5 = or i64 %or, %shr4 |
| %xor = xor i64 %or5, %t1 |
| %gp2 = inttoptr i64 %xor to ptr |
| store i16 0, ptr %gp2, align 2 |
| ret void |
| } |
| |
| ; Constant 32-bit memory tests - addrspace(6) |
| ; Uses 32-bit pointers, all 32 bits preserved |
| |
| define void @test_xor_constant_32bit(ptr addrspace(6) %sp) { |
| ; CHECK-LABEL: define void @test_xor_constant_32bit( |
| ; CHECK-SAME: ptr addrspace(6) [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(6) [[SP]] to ptr |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64 |
| ; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 7 |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr |
| ; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[GP2]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast ptr addrspace(6) %sp to ptr |
| %a = ptrtoint ptr %gp to i64 |
| %b = xor i64 %a, 7 |
| %gp2 = inttoptr i64 %b to ptr |
| %val = load i16, ptr %gp2, align 2 |
| ret void |
| } |
| |
| ; Vector pointer tests - vectors are NOT currently optimized by InferAddressSpaces |
| |
| define void @test_xor_local_vector(<2 x ptr addrspace(3)> %sp) { |
| ; CHECK-LABEL: define void @test_xor_local_vector( |
| ; CHECK-SAME: <2 x ptr addrspace(3)> [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast <2 x ptr addrspace(3)> [[SP]] to <2 x ptr> |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint <2 x ptr> [[GP]] to <2 x i64> |
| ; CHECK-NEXT: [[B:%.*]] = xor <2 x i64> [[A]], splat (i64 4095) |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr <2 x i64> [[B]] to <2 x ptr> |
| ; CHECK-NEXT: [[EL0:%.*]] = extractelement <2 x ptr> [[GP2]], i32 0 |
| ; CHECK-NEXT: store i16 0, ptr [[EL0]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast <2 x ptr addrspace(3)> %sp to <2 x ptr> |
| %a = ptrtoint <2 x ptr> %gp to <2 x i64> |
| %b = xor <2 x i64> %a, splat (i64 4095) |
| %gp2 = inttoptr <2 x i64> %b to <2 x ptr> |
| %el0 = extractelement <2 x ptr> %gp2, i32 0 |
| store i16 0, ptr %el0, align 2 |
| ret void |
| } |
| |
| define void @test_xor_global_vector(<2 x ptr addrspace(1)> %sp) { |
| ; CHECK-LABEL: define void @test_xor_global_vector( |
| ; CHECK-SAME: <2 x ptr addrspace(1)> [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast <2 x ptr addrspace(1)> [[SP]] to <2 x ptr> |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint <2 x ptr> [[GP]] to <2 x i64> |
| ; CHECK-NEXT: [[B:%.*]] = xor <2 x i64> [[A]], splat (i64 4294967296) |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr <2 x i64> [[B]] to <2 x ptr> |
| ; CHECK-NEXT: [[EL0:%.*]] = extractelement <2 x ptr> [[GP2]], i32 0 |
| ; CHECK-NEXT: store i16 0, ptr [[EL0]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast <2 x ptr addrspace(1)> %sp to <2 x ptr> |
| %a = ptrtoint <2 x ptr> %gp to <2 x i64> |
| ; Vector case - not currently optimized by InferAddressSpaces |
| %b = xor <2 x i64> %a, splat (i64 4294967296) |
| %gp2 = inttoptr <2 x i64> %b to <2 x ptr> |
| %el0 = extractelement <2 x ptr> %gp2, i32 0 |
| store i16 0, ptr %el0, align 2 |
| ret void |
| } |
| |
| define void @test_xor_private_vector(<2 x ptr addrspace(5)> %sp) { |
| ; CHECK-LABEL: define void @test_xor_private_vector( |
| ; CHECK-SAME: <2 x ptr addrspace(5)> [[SP:%.*]]) { |
| ; CHECK-NEXT: [[GP:%.*]] = addrspacecast <2 x ptr addrspace(5)> [[SP]] to <2 x ptr> |
| ; CHECK-NEXT: [[A:%.*]] = ptrtoint <2 x ptr> [[GP]] to <2 x i64> |
| ; CHECK-NEXT: [[B:%.*]] = xor <2 x i64> [[A]], splat (i64 255) |
| ; CHECK-NEXT: [[GP2:%.*]] = inttoptr <2 x i64> [[B]] to <2 x ptr> |
| ; CHECK-NEXT: [[EL0:%.*]] = extractelement <2 x ptr> [[GP2]], i32 0 |
| ; CHECK-NEXT: store i16 0, ptr [[EL0]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %gp = addrspacecast <2 x ptr addrspace(5)> %sp to <2 x ptr> |
| %a = ptrtoint <2 x ptr> %gp to <2 x i64> |
| %b = xor <2 x i64> %a, splat (i64 255) |
| %gp2 = inttoptr <2 x i64> %b to <2 x ptr> |
| %el0 = extractelement <2 x ptr> %gp2, i32 0 |
| store i16 0, ptr %el0, align 2 |
| ret void |
| } |