| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer %s | FileCheck -check-prefix=GFX9 %s |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -passes=slp-vectorizer %s | FileCheck -check-prefix=GFX1250 %s |
| |
| define amdgpu_kernel void @exp2_combine(ptr addrspace(1) %arg) { |
| ; GFX9-LABEL: define amdgpu_kernel void @exp2_combine( |
| ; GFX9-SAME: ptr addrspace(1) [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; GFX9-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; GFX9-NEXT: [[IDX:%.*]] = zext i32 [[TID]] to i64 |
| ; GFX9-NEXT: [[PTR0:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[ARG]], i64 [[IDX]] |
| ; GFX9-NEXT: [[VAL0:%.*]] = load float, ptr addrspace(1) [[PTR0]], align 4 |
| ; GFX9-NEXT: [[EXP0:%.*]] = call float @llvm.exp2.f32(float [[VAL0]]) |
| ; GFX9-NEXT: store float [[EXP0]], ptr addrspace(1) [[PTR0]], align 4 |
| ; GFX9-NEXT: [[IDX1:%.*]] = add nuw nsw i64 [[IDX]], 1 |
| ; GFX9-NEXT: [[PTR1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[ARG]], i64 [[IDX1]] |
| ; GFX9-NEXT: [[VAL1:%.*]] = load float, ptr addrspace(1) [[PTR1]], align 4 |
| ; GFX9-NEXT: [[EXP1:%.*]] = call float @llvm.exp2.f32(float [[VAL1]]) |
| ; GFX9-NEXT: store float [[EXP1]], ptr addrspace(1) [[PTR1]], align 4 |
| ; GFX9-NEXT: ret void |
| ; |
| ; GFX1250-LABEL: define amdgpu_kernel void @exp2_combine( |
| ; GFX1250-SAME: ptr addrspace(1) [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; GFX1250-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; GFX1250-NEXT: [[IDX:%.*]] = zext i32 [[TID]] to i64 |
| ; GFX1250-NEXT: [[PTR0:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[ARG]], i64 [[IDX]] |
| ; GFX1250-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[PTR0]], align 4 |
| ; GFX1250-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[TMP1]]) |
| ; GFX1250-NEXT: store <2 x float> [[TMP2]], ptr addrspace(1) [[PTR0]], align 4 |
| ; GFX1250-NEXT: ret void |
| ; |
| %tid = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %idx = zext i32 %tid to i64 |
| %ptr0 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %idx |
| %val0 = load float, ptr addrspace(1) %ptr0, align 4 |
| %exp0 = call float @llvm.exp2.f32(float %val0) |
| store float %exp0, ptr addrspace(1) %ptr0, align 4 |
| %idx1 = add nuw nsw i64 %idx, 1 |
| %ptr1 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %idx1 |
| %val1 = load float, ptr addrspace(1) %ptr1, align 4 |
| %exp1 = call float @llvm.exp2.f32(float %val1) |
| store float %exp1, ptr addrspace(1) %ptr1, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @exp_afn_combine(ptr addrspace(1) %arg) { |
| ; GFX9-LABEL: define amdgpu_kernel void @exp_afn_combine( |
| ; GFX9-SAME: ptr addrspace(1) [[ARG:%.*]]) #[[ATTR0]] { |
| ; GFX9-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; GFX9-NEXT: [[IDX:%.*]] = zext i32 [[TID]] to i64 |
| ; GFX9-NEXT: [[PTR0:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[ARG]], i64 [[IDX]] |
| ; GFX9-NEXT: [[VAL0:%.*]] = load float, ptr addrspace(1) [[PTR0]], align 4 |
| ; GFX9-NEXT: [[EXP0:%.*]] = call afn float @llvm.exp.f32(float [[VAL0]]) |
| ; GFX9-NEXT: store float [[EXP0]], ptr addrspace(1) [[PTR0]], align 4 |
| ; GFX9-NEXT: [[IDX1:%.*]] = add nuw nsw i64 [[IDX]], 1 |
| ; GFX9-NEXT: [[PTR1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[ARG]], i64 [[IDX1]] |
| ; GFX9-NEXT: [[VAL1:%.*]] = load float, ptr addrspace(1) [[PTR1]], align 4 |
| ; GFX9-NEXT: [[EXP1:%.*]] = call afn float @llvm.exp.f32(float [[VAL1]]) |
| ; GFX9-NEXT: store float [[EXP1]], ptr addrspace(1) [[PTR1]], align 4 |
| ; GFX9-NEXT: ret void |
| ; |
| ; GFX1250-LABEL: define amdgpu_kernel void @exp_afn_combine( |
| ; GFX1250-SAME: ptr addrspace(1) [[ARG:%.*]]) #[[ATTR0]] { |
| ; GFX1250-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; GFX1250-NEXT: [[IDX:%.*]] = zext i32 [[TID]] to i64 |
| ; GFX1250-NEXT: [[PTR0:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[ARG]], i64 [[IDX]] |
| ; GFX1250-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[PTR0]], align 4 |
| ; GFX1250-NEXT: [[TMP2:%.*]] = call afn <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP1]]) |
| ; GFX1250-NEXT: store <2 x float> [[TMP2]], ptr addrspace(1) [[PTR0]], align 4 |
| ; GFX1250-NEXT: ret void |
| ; |
| %tid = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %idx = zext i32 %tid to i64 |
| %ptr0 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %idx |
| %val0 = load float, ptr addrspace(1) %ptr0, align 4 |
| %exp0 = call afn float @llvm.exp.f32(float %val0) |
| store float %exp0, ptr addrspace(1) %ptr0, align 4 |
| %idx1 = add nuw nsw i64 %idx, 1 |
| %ptr1 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %idx1 |
| %val1 = load float, ptr addrspace(1) %ptr1, align 4 |
| %exp1 = call afn float @llvm.exp.f32(float %val1) |
| store float %exp1, ptr addrspace(1) %ptr1, align 4 |
| ret void |
| } |