| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -passes=slp-vectorizer \ |
| ; RUN: < %s | FileCheck -check-prefix=GFX950 %s |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -passes=slp-vectorizer \ |
| ; RUN: -slp-inst-count-check=true < %s | FileCheck -check-prefix=GFX950 %s |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=slp-vectorizer \ |
| ; RUN: < %s | FileCheck -check-prefix=GFX942 %s |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=slp-vectorizer \ |
| ; RUN: < %s | FileCheck -check-prefix=GFX906 %s |
| |
| define amdgpu_kernel void @phi5_rotate( |
| ; GFX950-LABEL: define amdgpu_kernel void @phi5_rotate( |
| ; GFX950-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; GFX950-NEXT: [[ENTRY:.*]]: |
| ; GFX950-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[S0]], i32 0 |
| ; GFX950-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[S1]], i32 1 |
| ; GFX950-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[S4]], i32 0 |
| ; GFX950-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[S0]], i32 1 |
| ; GFX950-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[S3]], i32 0 |
| ; GFX950-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[S4]], i32 1 |
| ; GFX950-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[S2]], i32 0 |
| ; GFX950-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[S3]], i32 1 |
| ; GFX950-NEXT: br label %[[LOOP:.*]] |
| ; GFX950: [[LOOP]]: |
| ; GFX950-NEXT: [[I1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] |
| ; GFX950-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP1]], %[[ENTRY]] ], [ [[TMP9:%.*]], %[[LOOP]] ] |
| ; GFX950-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP3]], %[[ENTRY]] ], [ [[TMP10:%.*]], %[[LOOP]] ] |
| ; GFX950-NEXT: [[TMP10]] = phi <2 x i32> [ [[TMP5]], %[[ENTRY]] ], [ [[TMP11:%.*]], %[[LOOP]] ] |
| ; GFX950-NEXT: [[TMP11]] = phi <2 x i32> [ [[TMP7]], %[[ENTRY]] ], [ [[TMP12:%.*]], %[[LOOP]] ] |
| ; GFX950-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]] |
| ; GFX950-NEXT: store <2 x i32> [[TMP8]], ptr addrspace(1) [[GEP1]], align 4 |
| ; GFX950-NEXT: [[I_NEXT]] = add nuw i32 [[I1]], 2 |
| ; GFX950-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] |
| ; GFX950-NEXT: [[TMP12]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP11]], <2 x i32> <i32 1, i32 2> |
| ; GFX950-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; GFX950: [[EXIT]]: |
| ; GFX950-NEXT: ret void |
| ; |
| ; GFX942-LABEL: define amdgpu_kernel void @phi5_rotate( |
| ; GFX942-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; GFX942-NEXT: [[ENTRY:.*]]: |
| ; GFX942-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[S0]], i32 0 |
| ; GFX942-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[S1]], i32 1 |
| ; GFX942-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[S4]], i32 0 |
| ; GFX942-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[S0]], i32 1 |
| ; GFX942-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[S3]], i32 0 |
| ; GFX942-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[S4]], i32 1 |
| ; GFX942-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[S2]], i32 0 |
| ; GFX942-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[S3]], i32 1 |
| ; GFX942-NEXT: br label %[[LOOP:.*]] |
| ; GFX942: [[LOOP]]: |
| ; GFX942-NEXT: [[I1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] |
| ; GFX942-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP1]], %[[ENTRY]] ], [ [[TMP9:%.*]], %[[LOOP]] ] |
| ; GFX942-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP3]], %[[ENTRY]] ], [ [[TMP10:%.*]], %[[LOOP]] ] |
| ; GFX942-NEXT: [[TMP10]] = phi <2 x i32> [ [[TMP5]], %[[ENTRY]] ], [ [[TMP11:%.*]], %[[LOOP]] ] |
| ; GFX942-NEXT: [[TMP11]] = phi <2 x i32> [ [[TMP7]], %[[ENTRY]] ], [ [[TMP12:%.*]], %[[LOOP]] ] |
| ; GFX942-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]] |
| ; GFX942-NEXT: store <2 x i32> [[TMP8]], ptr addrspace(1) [[GEP1]], align 4 |
| ; GFX942-NEXT: [[I_NEXT]] = add nuw i32 [[I1]], 2 |
| ; GFX942-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] |
| ; GFX942-NEXT: [[TMP12]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP11]], <2 x i32> <i32 1, i32 2> |
| ; GFX942-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; GFX942: [[EXIT]]: |
| ; GFX942-NEXT: ret void |
| ; |
| ; GFX906-LABEL: define amdgpu_kernel void @phi5_rotate( |
| ; GFX906-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; GFX906-NEXT: [[ENTRY:.*]]: |
| ; GFX906-NEXT: br label %[[LOOP:.*]] |
| ; GFX906: [[LOOP]]: |
| ; GFX906-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] |
| ; GFX906-NEXT: [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ] |
| ; GFX906-NEXT: [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ] |
| ; GFX906-NEXT: [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ] |
| ; GFX906-NEXT: [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ] |
| ; GFX906-NEXT: [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ] |
| ; GFX906-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]] |
| ; GFX906-NEXT: store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4 |
| ; GFX906-NEXT: [[I1:%.*]] = or disjoint i32 [[I]], 1 |
| ; GFX906-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]] |
| ; GFX906-NEXT: store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4 |
| ; GFX906-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 2 |
| ; GFX906-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] |
| ; GFX906-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; GFX906: [[EXIT]]: |
| ; GFX906-NEXT: ret void |
| ; |
| ; GFX941-LABEL: define amdgpu_kernel void @phi5_rotate( |
| ; GFX941-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; GFX941-NEXT: [[ENTRY:.*]]: |
| ; GFX941-NEXT: br label %[[LOOP:.*]] |
| ; GFX941: [[LOOP]]: |
| ; GFX941-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] |
| ; GFX941-NEXT: [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ] |
| ; GFX941-NEXT: [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ] |
| ; GFX941-NEXT: [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ] |
| ; GFX941-NEXT: [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ] |
| ; GFX941-NEXT: [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ] |
| ; GFX941-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]] |
| ; GFX941-NEXT: store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4 |
| ; GFX941-NEXT: [[I1:%.*]] = or disjoint i32 [[I]], 1 |
| ; GFX941-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]] |
| ; GFX941-NEXT: store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4 |
| ; GFX941-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 2 |
| ; GFX941-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] |
| ; GFX941-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; GFX941: [[EXIT]]: |
| ; GFX941-NEXT: ret void |
| ; GFX940-LABEL: define amdgpu_kernel void @phi5_rotate( |
| ; GFX940-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; GFX940-NEXT: [[ENTRY:.*]]: |
| ; GFX940-NEXT: br label %[[LOOP:.*]] |
| ; GFX940: [[LOOP]]: |
| ; GFX940-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] |
| ; GFX940-NEXT: [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ] |
| ; GFX940-NEXT: [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ] |
| ; GFX940-NEXT: [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ] |
| ; GFX940-NEXT: [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ] |
| ; GFX940-NEXT: [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ] |
| ; GFX940-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]] |
| ; GFX940-NEXT: store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4 |
| ; GFX940-NEXT: [[I1:%.*]] = or disjoint i32 [[I]], 1 |
| ; GFX940-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]] |
| ; GFX940-NEXT: store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4 |
| ; GFX940-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 2 |
| ; GFX940-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] |
| ; GFX940-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; GFX940: [[EXIT]]: |
| ; GFX940-NEXT: ret void |
| ptr addrspace(1) nocapture %out, |
| i32 %n, |
| i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4) { |
| entry: |
| br label %loop |
| |
| loop: |
| %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] |
| %x0 = phi i32 [ %s0, %entry ], [ %x4, %loop ] |
| %x1 = phi i32 [ %s1, %entry ], [ %x0, %loop ] |
| %x2 = phi i32 [ %s2, %entry ], [ %x1, %loop ] |
| %x3 = phi i32 [ %s3, %entry ], [ %x2, %loop ] |
| %x4 = phi i32 [ %s4, %entry ], [ %x3, %loop ] |
| %gep0 = getelementptr i32, ptr addrspace(1) %out, i32 %i |
| store i32 %x0, ptr addrspace(1) %gep0, align 4 |
| %i1 = or disjoint i32 %i, 1 |
| %gep1 = getelementptr i32, ptr addrspace(1) %out, i32 %i1 |
| store i32 %x1, ptr addrspace(1) %gep1, align 4 |
| %i.next = add nuw i32 %i, 2 |
| %cmp = icmp ult i32 %i.next, %n |
| br i1 %cmp, label %loop, label %exit |
| |
| exit: |
| ret void |
| } |