blob: ff77b7392a997953d05d372bd61f7f2e0ae9ef30 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -passes=slp-vectorizer \
; RUN: < %s | FileCheck -check-prefix=GFX950 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -passes=slp-vectorizer \
; RUN: -slp-inst-count-check=true < %s | FileCheck -check-prefix=GFX950 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=slp-vectorizer \
; RUN: < %s | FileCheck -check-prefix=GFX942 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=slp-vectorizer \
; RUN: < %s | FileCheck -check-prefix=GFX906 %s
define amdgpu_kernel void @phi5_rotate(
; GFX950-LABEL: define amdgpu_kernel void @phi5_rotate(
; GFX950-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX950-NEXT: [[ENTRY:.*]]:
; GFX950-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[S0]], i32 0
; GFX950-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[S1]], i32 1
; GFX950-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[S4]], i32 0
; GFX950-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[S0]], i32 1
; GFX950-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[S3]], i32 0
; GFX950-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[S4]], i32 1
; GFX950-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[S2]], i32 0
; GFX950-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[S3]], i32 1
; GFX950-NEXT: br label %[[LOOP:.*]]
; GFX950: [[LOOP]]:
; GFX950-NEXT: [[I1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
; GFX950-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP1]], %[[ENTRY]] ], [ [[TMP9:%.*]], %[[LOOP]] ]
; GFX950-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP3]], %[[ENTRY]] ], [ [[TMP10:%.*]], %[[LOOP]] ]
; GFX950-NEXT: [[TMP10]] = phi <2 x i32> [ [[TMP5]], %[[ENTRY]] ], [ [[TMP11:%.*]], %[[LOOP]] ]
; GFX950-NEXT: [[TMP11]] = phi <2 x i32> [ [[TMP7]], %[[ENTRY]] ], [ [[TMP12:%.*]], %[[LOOP]] ]
; GFX950-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]]
; GFX950-NEXT: store <2 x i32> [[TMP8]], ptr addrspace(1) [[GEP1]], align 4
; GFX950-NEXT: [[I_NEXT]] = add nuw i32 [[I1]], 2
; GFX950-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
; GFX950-NEXT: [[TMP12]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP11]], <2 x i32> <i32 1, i32 2>
; GFX950-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; GFX950: [[EXIT]]:
; GFX950-NEXT: ret void
;
; GFX942-LABEL: define amdgpu_kernel void @phi5_rotate(
; GFX942-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX942-NEXT: [[ENTRY:.*]]:
; GFX942-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[S0]], i32 0
; GFX942-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[S1]], i32 1
; GFX942-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[S4]], i32 0
; GFX942-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[S0]], i32 1
; GFX942-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[S3]], i32 0
; GFX942-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[S4]], i32 1
; GFX942-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[S2]], i32 0
; GFX942-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[S3]], i32 1
; GFX942-NEXT: br label %[[LOOP:.*]]
; GFX942: [[LOOP]]:
; GFX942-NEXT: [[I1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
; GFX942-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP1]], %[[ENTRY]] ], [ [[TMP9:%.*]], %[[LOOP]] ]
; GFX942-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP3]], %[[ENTRY]] ], [ [[TMP10:%.*]], %[[LOOP]] ]
; GFX942-NEXT: [[TMP10]] = phi <2 x i32> [ [[TMP5]], %[[ENTRY]] ], [ [[TMP11:%.*]], %[[LOOP]] ]
; GFX942-NEXT: [[TMP11]] = phi <2 x i32> [ [[TMP7]], %[[ENTRY]] ], [ [[TMP12:%.*]], %[[LOOP]] ]
; GFX942-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]]
; GFX942-NEXT: store <2 x i32> [[TMP8]], ptr addrspace(1) [[GEP1]], align 4
; GFX942-NEXT: [[I_NEXT]] = add nuw i32 [[I1]], 2
; GFX942-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
; GFX942-NEXT: [[TMP12]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP11]], <2 x i32> <i32 1, i32 2>
; GFX942-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; GFX942: [[EXIT]]:
; GFX942-NEXT: ret void
;
; GFX906-LABEL: define amdgpu_kernel void @phi5_rotate(
; GFX906-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX906-NEXT: [[ENTRY:.*]]:
; GFX906-NEXT: br label %[[LOOP:.*]]
; GFX906: [[LOOP]]:
; GFX906-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
; GFX906-NEXT: [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ]
; GFX906-NEXT: [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ]
; GFX906-NEXT: [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ]
; GFX906-NEXT: [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ]
; GFX906-NEXT: [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ]
; GFX906-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]]
; GFX906-NEXT: store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4
; GFX906-NEXT: [[I1:%.*]] = or disjoint i32 [[I]], 1
; GFX906-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]]
; GFX906-NEXT: store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4
; GFX906-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 2
; GFX906-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
; GFX906-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; GFX906: [[EXIT]]:
; GFX906-NEXT: ret void
;
; GFX941-LABEL: define amdgpu_kernel void @phi5_rotate(
; GFX941-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX941-NEXT: [[ENTRY:.*]]:
; GFX941-NEXT: br label %[[LOOP:.*]]
; GFX941: [[LOOP]]:
; GFX941-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
; GFX941-NEXT: [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ]
; GFX941-NEXT: [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ]
; GFX941-NEXT: [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ]
; GFX941-NEXT: [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ]
; GFX941-NEXT: [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ]
; GFX941-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]]
; GFX941-NEXT: store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4
; GFX941-NEXT: [[I1:%.*]] = or disjoint i32 [[I]], 1
; GFX941-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]]
; GFX941-NEXT: store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4
; GFX941-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 2
; GFX941-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
; GFX941-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; GFX941: [[EXIT]]:
; GFX941-NEXT: ret void
; GFX940-LABEL: define amdgpu_kernel void @phi5_rotate(
; GFX940-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX940-NEXT: [[ENTRY:.*]]:
; GFX940-NEXT: br label %[[LOOP:.*]]
; GFX940: [[LOOP]]:
; GFX940-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
; GFX940-NEXT: [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ]
; GFX940-NEXT: [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ]
; GFX940-NEXT: [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ]
; GFX940-NEXT: [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ]
; GFX940-NEXT: [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ]
; GFX940-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]]
; GFX940-NEXT: store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4
; GFX940-NEXT: [[I1:%.*]] = or disjoint i32 [[I]], 1
; GFX940-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]]
; GFX940-NEXT: store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4
; GFX940-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 2
; GFX940-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
; GFX940-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; GFX940: [[EXIT]]:
; GFX940-NEXT: ret void
ptr addrspace(1) nocapture %out,
i32 %n,
i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4) {
entry:
br label %loop
loop:
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
%x0 = phi i32 [ %s0, %entry ], [ %x4, %loop ]
%x1 = phi i32 [ %s1, %entry ], [ %x0, %loop ]
%x2 = phi i32 [ %s2, %entry ], [ %x1, %loop ]
%x3 = phi i32 [ %s3, %entry ], [ %x2, %loop ]
%x4 = phi i32 [ %s4, %entry ], [ %x3, %loop ]
%gep0 = getelementptr i32, ptr addrspace(1) %out, i32 %i
store i32 %x0, ptr addrspace(1) %gep0, align 4
%i1 = or disjoint i32 %i, 1
%gep1 = getelementptr i32, ptr addrspace(1) %out, i32 %i1
store i32 %x1, ptr addrspace(1) %gep1, align 4
%i.next = add nuw i32 %i, 2
%cmp = icmp ult i32 %i.next, %n
br i1 %cmp, label %loop, label %exit
exit:
ret void
}