| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=nvptx64-nvidia-cuda -mcpu=sm_70 | FileCheck %s |
| ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=nvptx64-nvidia-cuda -mcpu=sm_40 | FileCheck %s -check-prefix=NOVECTOR |
| |
| define void @fusion(ptr noalias nocapture align 256 dereferenceable(19267584) %arg, ptr noalias nocapture readonly align 256 dereferenceable(19267584) %arg1, i32 %arg2, i32 %arg3) local_unnamed_addr #0 { |
| ; CHECK-LABEL: @fusion( |
| ; CHECK-NEXT: [[TMP:%.*]] = shl nuw nsw i32 [[ARG2:%.*]], 6 |
| ; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP]], [[ARG3:%.*]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 2 |
| ; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP5]] to i64 |
| ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, ptr [[ARG1:%.*]], i64 [[TMP6]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds half, ptr [[ARG:%.*]], i64 [[TMP6]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[TMP11]], align 8 |
| ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x half> [[TMP2]], <half 0xH5380, half 0xH5380> |
| ; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x half> [[TMP3]], <half 0xH57F0, half 0xH57F0> |
| ; CHECK-NEXT: store <2 x half> [[TMP4]], ptr [[TMP16]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| ; NOVECTOR-LABEL: @fusion( |
| ; NOVECTOR-NEXT: [[TMP:%.*]] = shl nuw nsw i32 [[ARG2:%.*]], 6 |
| ; NOVECTOR-NEXT: [[TMP4:%.*]] = or i32 [[TMP]], [[ARG3:%.*]] |
| ; NOVECTOR-NEXT: [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 2 |
| ; NOVECTOR-NEXT: [[TMP6:%.*]] = zext i32 [[TMP5]] to i64 |
| ; NOVECTOR-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], 1 |
| ; NOVECTOR-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, ptr [[ARG1:%.*]], i64 [[TMP6]] |
| ; NOVECTOR-NEXT: [[TMP12:%.*]] = load half, ptr [[TMP11]], align 8 |
| ; NOVECTOR-NEXT: [[TMP13:%.*]] = fmul fast half [[TMP12]], 0xH5380 |
| ; NOVECTOR-NEXT: [[TMP14:%.*]] = fadd fast half [[TMP13]], 0xH57F0 |
| ; NOVECTOR-NEXT: [[TMP16:%.*]] = getelementptr inbounds half, ptr [[ARG:%.*]], i64 [[TMP6]] |
| ; NOVECTOR-NEXT: store half [[TMP14]], ptr [[TMP16]], align 8 |
| ; NOVECTOR-NEXT: [[TMP17:%.*]] = getelementptr inbounds half, ptr [[ARG1]], i64 [[TMP7]] |
| ; NOVECTOR-NEXT: [[TMP18:%.*]] = load half, ptr [[TMP17]], align 2 |
| ; NOVECTOR-NEXT: [[TMP19:%.*]] = fmul fast half [[TMP18]], 0xH5380 |
| ; NOVECTOR-NEXT: [[TMP20:%.*]] = fadd fast half [[TMP19]], 0xH57F0 |
| ; NOVECTOR-NEXT: [[TMP21:%.*]] = getelementptr inbounds half, ptr [[ARG]], i64 [[TMP7]] |
| ; NOVECTOR-NEXT: store half [[TMP20]], ptr [[TMP21]], align 2 |
| ; NOVECTOR-NEXT: ret void |
| ; |
| %tmp = shl nuw nsw i32 %arg2, 6 |
| %tmp4 = or i32 %tmp, %arg3 |
| %tmp5 = shl nuw nsw i32 %tmp4, 2 |
| %tmp6 = zext i32 %tmp5 to i64 |
| %tmp7 = or i64 %tmp6, 1 |
| %tmp11 = getelementptr inbounds half, ptr %arg1, i64 %tmp6 |
| %tmp12 = load half, ptr %tmp11, align 8 |
| %tmp13 = fmul fast half %tmp12, 0xH5380 |
| %tmp14 = fadd fast half %tmp13, 0xH57F0 |
| %tmp16 = getelementptr inbounds half, ptr %arg, i64 %tmp6 |
| store half %tmp14, ptr %tmp16, align 8 |
| %tmp17 = getelementptr inbounds half, ptr %arg1, i64 %tmp7 |
| %tmp18 = load half, ptr %tmp17, align 2 |
| %tmp19 = fmul fast half %tmp18, 0xH5380 |
| %tmp20 = fadd fast half %tmp19, 0xH57F0 |
| %tmp21 = getelementptr inbounds half, ptr %arg, i64 %tmp7 |
| store half %tmp20, ptr %tmp21, align 2 |
| ret void |
| } |
| |
| attributes #0 = { nounwind } |