llvm/test/CodeGen/PowerPC/sms-store-dependence.ll - llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\
 ; RUN:       -mcpu=pwr9 --ppc-enable-pipeliner 2>&1 | FileCheck %s

 ; Test that the pipeliner schedules the store instructions correctly. Since
 ; there is a dependence between the store, they cannot be scheduled further than
 ; MII cycles/instructions apart. That is, the first store cannot occur multiple
 ; times before the second ctore in the schedule.
 define dso_local void @comp_method(ptr noalias nocapture noundef readonly %0, ptr nocapture noundef writeonly %1, ptr nocapture noundef writeonly %2, i32 noundef %3, i32 noundef %4, i32 noundef %5, i32 noundef %6, i64 %v1) local_unnamed_addr {
 ; CHECK-LABEL: comp_method:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    extsw 7, 8
 ; CHECK-NEXT:    extsw 8, 9
 ; CHECK-NEXT:    clrldi 9, 6, 32
 ; CHECK-NEXT:    addi 6, 3, -1
 ; CHECK-NEXT:    mtctr 9
 ; CHECK-NEXT:    li 11, 0
 ; CHECK-NEXT:    sradi 12, 11, 2
 ; CHECK-NEXT:    add 5, 5, 8
 ; CHECK-NEXT:    li 8, 2
 ; CHECK-NEXT:    li 3, 8
 ; CHECK-NEXT:    addi 11, 7, 0
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    lbzu 9, 1(6)
 ; CHECK-NEXT:    add 12, 12, 10
 ; CHECK-NEXT:    extsb 9, 9
 ; CHECK-NEXT:    stbx 8, 4, 9
 ; CHECK-NEXT:    add 9, 9, 12
 ; CHECK-NEXT:    bdz .LBB0_2
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_1:
 ; CHECK-NEXT:    lbzu 0, 1(6)
 ; CHECK-NEXT:    sradi 12, 11, 2
 ; CHECK-NEXT:    add 11, 11, 7
 ; CHECK-NEXT:    add 12, 12, 10
 ; CHECK-NEXT:    sldi 30, 9, 2
 ; CHECK-NEXT:    add 9, 9, 30
 ; CHECK-NEXT:    extsb 0, 0
 ; CHECK-NEXT:    stbx 3, 5, 9
 ; CHECK-NEXT:    add 9, 0, 12
 ; CHECK-NEXT:    stbx 8, 4, 0
 ; CHECK-NEXT:    bdnz .LBB0_1
 ; CHECK-NEXT:  .LBB0_2:
 ; CHECK-NEXT:    sldi 4, 9, 2
 ; CHECK-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    add 4, 9, 4
 ; CHECK-NEXT:    stbx 3, 5, 4
 ; CHECK-NEXT:    blr
   %8 = icmp sgt i32 %3, 64
   tail call void @llvm.assume(i1 %8)
   %9 = and i32 %3, 1
   %10 = icmp eq i32 %9, 0
   tail call void @llvm.assume(i1 %10)
   %11 = sext i32 %5 to i64
   %12 = sext i32 %6 to i64
   %13 = zext nneg i32 %3 to i64
   %14 = getelementptr i8, ptr %2, i64 %12
   br label %16

 15:
   ret void

 16:
   %17 = phi i64 [ 0, %7 ], [ %24, %16 ]
   %18 = getelementptr inbounds i8, ptr %0, i64 %17
   %19 = load i8, ptr %18, align 1
   %20 = sext i8 %19 to i64
   %21 = getelementptr inbounds i8, ptr %1, i64 %20
   store i8 2, ptr %21, align 1
   %22 = mul nsw i64 %17, %11
   %a1 = ashr i64 %22, 2
   %a2 = add i64 %a1, %v1
   %a3 = add i64 %20, %a2
   %a4 = mul nsw i64 %a3, 5
   %23 = getelementptr i8, ptr %14, i64 %a4
   store i8 8, ptr %23, align 1
   %24 = add nuw nsw i64 %17, 1
   %25 = icmp eq i64 %24, %13
   br i1 %25, label %15, label %16
 }

 declare void @llvm.assume(i1 noundef) #1

 attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
	; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\
	; RUN: -mcpu=pwr9 --ppc-enable-pipeliner 2>&1 \| FileCheck %s

	; Test that the pipeliner schedules the store instructions correctly. Since
	; there is a dependence between the store, they cannot be scheduled further than
	; MII cycles/instructions apart. That is, the first store cannot occur multiple
	; times before the second ctore in the schedule.
	define dso_local void @comp_method(ptr noalias nocapture noundef readonly %0, ptr nocapture noundef writeonly %1, ptr nocapture noundef writeonly %2, i32 noundef %3, i32 noundef %4, i32 noundef %5, i32 noundef %6, i64 %v1) local_unnamed_addr {
	; CHECK-LABEL: comp_method:
	; CHECK: # %bb.0:
	; CHECK-NEXT: extsw 7, 8
	; CHECK-NEXT: extsw 8, 9
	; CHECK-NEXT: clrldi 9, 6, 32
	; CHECK-NEXT: addi 6, 3, -1
	; CHECK-NEXT: mtctr 9
	; CHECK-NEXT: li 11, 0
	; CHECK-NEXT: sradi 12, 11, 2
	; CHECK-NEXT: add 5, 5, 8
	; CHECK-NEXT: li 8, 2
	; CHECK-NEXT: li 3, 8
	; CHECK-NEXT: addi 11, 7, 0
	; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
	; CHECK-NEXT: lbzu 9, 1(6)
	; CHECK-NEXT: add 12, 12, 10
	; CHECK-NEXT: extsb 9, 9
	; CHECK-NEXT: stbx 8, 4, 9
	; CHECK-NEXT: add 9, 9, 12
	; CHECK-NEXT: bdz .LBB0_2
	; CHECK-NEXT: .p2align 4
	; CHECK-NEXT: .LBB0_1:
	; CHECK-NEXT: lbzu 0, 1(6)
	; CHECK-NEXT: sradi 12, 11, 2
	; CHECK-NEXT: add 11, 11, 7
	; CHECK-NEXT: add 12, 12, 10
	; CHECK-NEXT: sldi 30, 9, 2
	; CHECK-NEXT: add 9, 9, 30
	; CHECK-NEXT: extsb 0, 0
	; CHECK-NEXT: stbx 3, 5, 9
	; CHECK-NEXT: add 9, 0, 12
	; CHECK-NEXT: stbx 8, 4, 0
	; CHECK-NEXT: bdnz .LBB0_1
	; CHECK-NEXT: .LBB0_2:
	; CHECK-NEXT: sldi 4, 9, 2
	; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
	; CHECK-NEXT: add 4, 9, 4
	; CHECK-NEXT: stbx 3, 5, 4
	; CHECK-NEXT: blr
	%8 = icmp sgt i32 %3, 64
	tail call void @llvm.assume(i1 %8)
	%9 = and i32 %3, 1
	%10 = icmp eq i32 %9, 0
	tail call void @llvm.assume(i1 %10)
	%11 = sext i32 %5 to i64
	%12 = sext i32 %6 to i64
	%13 = zext nneg i32 %3 to i64
	%14 = getelementptr i8, ptr %2, i64 %12
	br label %16

	15:
	ret void

	16:
	%17 = phi i64 [ 0, %7 ], [ %24, %16 ]
	%18 = getelementptr inbounds i8, ptr %0, i64 %17
	%19 = load i8, ptr %18, align 1
	%20 = sext i8 %19 to i64
	%21 = getelementptr inbounds i8, ptr %1, i64 %20
	store i8 2, ptr %21, align 1
	%22 = mul nsw i64 %17, %11
	%a1 = ashr i64 %22, 2
	%a2 = add i64 %a1, %v1
	%a3 = add i64 %20, %a2
	%a4 = mul nsw i64 %a3, 5
	%23 = getelementptr i8, ptr %14, i64 %a4
	store i8 8, ptr %23, align 1
	%24 = add nuw nsw i64 %17, 1
	%25 = icmp eq i64 %24, %13
	br i1 %25, label %15, label %16
	}

	declare void @llvm.assume(i1 noundef) #1

	attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }