blob: 90877be255e0f44a70d293b8afb68237cd92af2a [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=instcombine -S < %s | FileCheck %s
%struct.type = type { [256 x <2 x i64>] }
@g1 = external hidden addrspace(3) global %struct.type, align 16
; This test requires the PtrReplacer to replace users in an RPO traversal.
; Furthermore, %ptr.else need not to be replaced so it must be retained in
; %ptr.sink.
define <2 x i64> @func(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) {
; CHECK-LABEL: define <2 x i64> @func(
; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[CMP_0]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
; CHECK: [[IF_THEN]]:
; CHECK-NEXT: [[VAL_THEN:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
; CHECK-NEXT: br label %[[SINK:.*]]
; CHECK: [[IF_ELSE]]:
; CHECK-NEXT: [[PTR_ELSE:%.*]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
; CHECK-NEXT: br label %[[SINK]]
; CHECK: [[SINK]]:
; CHECK-NEXT: [[PTR_SINK:%.*]] = phi ptr [ [[PTR_ELSE]], %[[IF_ELSE]] ], [ [[VAL_THEN]], %[[IF_THEN]] ]
; CHECK-NEXT: [[VAL_SINK:%.*]] = load <2 x i64>, ptr [[PTR_SINK]], align 16
; CHECK-NEXT: ret <2 x i64> [[VAL_SINK]]
;
entry:
%coerce = alloca %struct.type, align 16, addrspace(5)
call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 16 %coerce, ptr addrspace(4) align 16 %0, i64 4096, i1 false)
br i1 %cmp.0, label %if.then, label %if.else
if.then: ; preds = %entry
%ptr.then = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 0
%val.then = addrspacecast ptr addrspace(5) %ptr.then to ptr
br label %sink
if.else: ; preds = %entry
%ptr.else = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
%val.else = getelementptr inbounds nuw i8, ptr %ptr.else, i64 0
br label %sink
sink:
%ptr.sink = phi ptr [ %val.else, %if.else ], [ %val.then, %if.then ]
%val.sink = load <2 x i64>, ptr %ptr.sink, align 16
ret <2 x i64> %val.sink
}
define <2 x i64> @func_phi_loop(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) {
; CHECK-LABEL: define <2 x i64> @func_phi_loop(
; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[VAL_0:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[PTR_PHI_R:%.*]] = phi ptr [ [[PTR_1:%.*]], %[[LOOP]] ], [ [[VAL_0]], %[[ENTRY]] ]
; CHECK-NEXT: [[PTR_1]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
; CHECK-NEXT: br i1 [[CMP_0]], label %[[LOOP]], label %[[SINK:.*]]
; CHECK: [[SINK]]:
; CHECK-NEXT: [[VAL_SINK:%.*]] = load <2 x i64>, ptr [[PTR_PHI_R]], align 16
; CHECK-NEXT: ret <2 x i64> [[VAL_SINK]]
;
entry:
%coerce = alloca %struct.type, align 16, addrspace(5)
call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 16 %coerce, ptr addrspace(4) align 16 %0, i64 4096, i1 false)
%ptr.0 = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 0
%val.0 = addrspacecast ptr addrspace(5) %ptr.0 to ptr
br label %loop
loop:
%ptr.phi = phi ptr [ %val.1, %loop ], [ %val.0, %entry ]
%ptr.1 = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
%val.1 = getelementptr inbounds nuw i8, ptr %ptr.1, i64 0
br i1 %cmp.0, label %loop, label %sink
sink:
%val.sink = load <2 x i64>, ptr %ptr.phi, align 16
ret <2 x i64> %val.sink
}
; Crashed in IC PtrReplacer because an invalid select was generated with addrspace(4) and addrspace(5)
; operands.
define amdgpu_kernel void @select_addr4_addr5(ptr addrspace(4) byref([12 x i8]) align 16 %arg) {
; CHECK-LABEL: define amdgpu_kernel void @select_addr4_addr5(
; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) {
; CHECK-NEXT: [[BB:.*:]]
; CHECK-NEXT: ret void
;
bb:
%alloca = alloca i32, i32 0, align 8, addrspace(5)
%alloca1 = alloca [12 x i8], align 16, addrspace(5)
call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false)
%select = select i1 false, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca
call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false)
ret void
}
; Same as above but with swapped operands on the select.
define amdgpu_kernel void @select_addr4_addr5_swapped(ptr addrspace(4) byref([12 x i8]) align 16 %arg) {
; CHECK-LABEL: define amdgpu_kernel void @select_addr4_addr5_swapped(
; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) {
; CHECK-NEXT: [[BB:.*:]]
; CHECK-NEXT: ret void
;
bb:
%alloca = alloca i32, i32 0, align 8, addrspace(5)
%alloca1 = alloca [12 x i8], align 16, addrspace(5)
call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false)
%select = select i1 false, ptr addrspace(5) %alloca, ptr addrspace(5) %alloca1
call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false)
ret void
}
@global = external addrspace(1) constant [16 x float], align 64
define float @issue160302(i1 %cond, ptr addrspace(5) %arg) {
; CHECK-LABEL: define float @issue160302(
; CHECK-SAME: i1 [[COND:%.*]], ptr addrspace(5) [[ARG:%.*]]) {
; CHECK-NEXT: [[AGG_TMP2_I4:%.*]] = alloca [16 x float], align 64, addrspace(5)
; CHECK-NEXT: [[SELECT_PTR:%.*]] = select i1 [[COND]], ptr addrspace(5) [[AGG_TMP2_I4]], ptr addrspace(5) [[ARG]]
; CHECK-NEXT: [[COND_I:%.*]] = load float, ptr addrspace(5) [[SELECT_PTR]], align 4
; CHECK-NEXT: ret float [[COND_I]]
;
%agg.tmp2.i4 = alloca [16 x float], align 64, addrspace(5)
call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) %agg.tmp2.i4, ptr addrspace(1) @global, i64 0, i1 false)
%m_Data.i14.i = getelementptr [16 x float], ptr addrspace(5) %agg.tmp2.i4, i32 0, i32 0
%gep = getelementptr [16 x float], ptr addrspace(5) %arg, i32 0, i32 0
%select.ptr = select i1 %cond, ptr addrspace(5) %m_Data.i14.i, ptr addrspace(5) %gep
%cond.i = load float, ptr addrspace(5) %select.ptr, align 4
ret float %cond.i
}
declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias writeonly captures(none), ptr addrspace(4) noalias readonly captures(none), i64, i1 immarg) #0