test/CodeGen/AArch64/constant-pool-partition.ll - llvm-project/llvm - Git at Google

 ; RUN: llc -mtriple=aarch64 -partition-static-data-sections \
 ; RUN:     -function-sections -unique-section-names=false \
 ; RUN:     %s -o - 2>&1 | FileCheck %s --dump-input=always

 ; Repeat the RUN command above for big-endian systems.
 ; RUN: llc -mtriple=aarch64_be -partition-static-data-sections \
 ; RUN:     -function-sections -unique-section-names=false \
 ; RUN:     %s -o - 2>&1 | FileCheck %s --dump-input=always

 ; Tests that constant pool hotness is aggregated across the module. The
 ; static-data-splitter processes data from cold_func first, unprofiled_func
 ; secondly, and then hot_func. Specifically, tests that
 ; - If a constant is accessed by hot functions, all constant pools for this
 ;   constant (e.g., from an unprofiled function, or cold function) should have
 ;   `.hot` suffix. For instance, double 0.68 is seen by both @cold_func and
 ;   @hot_func, so two CPI emits (under label LCPI0_0 and LCPI2_0) have `.hot`
 ;   suffix.
 ; - Similarly if a constant is accessed by both cold function and un-profiled
 ;   function, constant pools for this constant should not have `.unlikely` suffix.

 ;; Constant pools for function @cold_func.
 ; CHECK:       .section	.rodata.cst8.hot.,"aM",@progbits,8
 ; CHECK-NEXT:     .p2align
 ; CHECK-NEXT:   .LCPI0_0:
 ; CHECK-NEXT:	    .xword	0x3fe5c28f5c28f5c3              // double 0.68000000000000005
 ; CHECK-NEXT: .section	.rodata.cst8.unlikely.,"aM",@progbits,8
 ; CHECK-NEXT:     .p2align
 ; CHECK-NEXT:   .LCPI0_1:
 ; CHECK-NEXT:     .xword 0x3fe5eb851eb851ec              // double 0.68500000000000005
 ; CHECK-NEXT:	.section	.rodata.cst8,"aM",@progbits,8
 ; CHECK-NEXT:     .p2align
 ; CHECK-NEXT:   .LCPI0_2:
 ; CHECK-NEXT:     .byte   0                               // 0x0
 ; CHECK-NEXT:     .byte   4                               // 0x4
 ; CHECK-NEXT:     .byte   8                               // 0x8
 ; CHECK-NEXT:     .byte   12                              // 0xc
 ; CHECK-NEXT:     .byte   255                             // 0xff
 ; CHECK-NEXT:     .byte   255                             // 0xff
 ; CHECK-NEXT:     .byte   255                             // 0xff
 ; CHECK-NEXT:     .byte   255                             // 0xff

 ;; Constant pools for function @unprofiled_func
 ; CHECK:	    .section	.rodata.cst8,"aM",@progbits,8
 ; CHECK-NEXT:     .p2align
 ; CHECK-NEXT:   .LCPI1_0:
 ; CHECK-NEXT:     .byte   0                               // 0x0
 ; CHECK-NEXT:     .byte   4                               // 0x4
 ; CHECK-NEXT:     .byte   8                               // 0x8
 ; CHECK-NEXT:     .byte   12                              // 0xc
 ; CHECK-NEXT:     .byte   255                             // 0xff
 ; CHECK-NEXT:     .byte   255                             // 0xff
 ; CHECK-NEXT:     .byte   255                             // 0xff
 ; CHECK-NEXT:     .byte   255                             // 0xff
 ; CHECK-NEXT: .section .rodata.cst16,"aM",@progbits,16
 ; CHECK-NEXT:     .p2align
 ; CHECK-NEXT:   .LCPI1_1:
 ; CHECK-NEXT:     .word 2                                 // 0x2
 ; CHECK-NEXT:     .word 3                                 // 0x3
 ; CHECK-NEXT:     .word 5                                 // 0x5
 ; CHECK-NEXT:     .word 7                                 // 0x7
 ; CHECK-NEXT: .section        .rodata.cst16.hot.,"aM",@progbits,16
 ; CHECK-NEXT:     .p2align
 ; CHECK-NEXT:   .LCPI1_2:
 ; CHECK-NEXT:     .word   442                             // 0x1ba
 ; CHECK-NEXT:     .word   100                             // 0x64
 ; CHECK-NEXT:     .word   0                               // 0x0
 ; CHECK-NEXT:     .word   0                               // 0x0

 ;; Constant pools for function @hot_func
 ; CHECK:      .section        .rodata.cst8.hot.,"aM",@progbits,8
 ; CHECK-NEXT:     .p2align
 ; CHECK-NEXT:   .LCPI2_0:
 ; CHECK-NEXT:     .xword  0x3fe5c28f5c28f5c3              // double 0.68000000000000005
 ; CHECK-NEXT: .section        .rodata.cst16.hot.,"aM",@progbits,16
 ; CHECK-NEXT:     .p2align
 ; CHECK-NEXT:   .LCPI2_1:
 ; CHECK-NEXT:     .word   0                               // 0x0
 ; CHECK-NEXT:     .word   100                             // 0x64
 ; CHECK-NEXT:     .word   0                               // 0x0
 ; CHECK-NEXT:     .word   442                             // 0x1ba
 ; CHECK-NEXT:   .LCPI2_2:
 ; CHECK-NEXT:     .word   442                             // 0x1ba
 ; CHECK-NEXT:     .word   100                             // 0x64
 ; CHECK-NEXT:     .word   0                               // 0x0
 ; CHECK-NEXT:     .word   0                               // 0x0

 ;; For global variable @val
 ;; The section name remains `.rodata.cst32` without hotness prefix because
 ;; the variable has external linkage and not analyzed. Compiler need symbolized
 ;; data access profiles to annotate such global variables' hotness.
 ; CHECK:       .section	.rodata.cst32,"aM",@progbits,32
 ; CHECK-NEXT:  .globl	val

 define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 {
   %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
   %num = tail call i32 (...) @func_taking_arbitrary_param(double 6.8500000e-01)
   %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
   %t2 = bitcast <8 x i8> %t1 to <2 x i32>
   %3 = extractelement <2 x i32> %t2, i32 1
   %sum = add i32 %2, %3
   %ret = add i32 %sum, %num
   ret i32 %ret
 }

 declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>)
 declare i32 @func_taking_arbitrary_param(...)

 define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) {
   %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
   %t2 = bitcast <8 x i8> %t1 to <4 x i16>
   %t3 = zext <4 x i16> %t2 to <4 x i32>
   %t4 = add <4 x i32> %t3, <i32 2, i32 3, i32 5, i32 7>
   %cmp = icmp ule <4 x i32> <i32 442, i32 100, i32 0, i32 0>, %t4
   ret <4 x i1> %cmp
 }

 define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 {
   %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
   %b = add <4 x i32> <i32 0, i32 100, i32 0, i32 442>, %a
   %c = icmp ule <4 x i32> %b, <i32 442, i32 100, i32 0, i32 0>
   ret <4 x i1> %c
 }

 @val = unnamed_addr constant i256 1

 define i32 @main(i32 %0, ptr %1) !prof !16 {
   br label %7

 5:                                                ; preds = %7
   %x = call double @double_func()
   %a = call <16 x i8> @vector_func_16i8()
   %b = call <16 x i8> @vector_func_16i8()
   call void @cold_func(double %x, <16 x i8> %a, <16 x i8> %b)
   ret i32 0

 7:                                                ; preds = %7, %2
   %8 = phi i32 [ 0, %2 ], [ %10, %7 ]
   %seed_val = load i256, ptr @val
   %9 = call i32 @seed(i256 %seed_val)
   call void @hot_func(i32 %9)
   %10 = add i32 %8, 1
   %11 = icmp eq i32 %10, 100000
   br i1 %11, label %5, label %7, !prof !18
 }

 declare i32 @seed(i256)
 declare double @double_func()
 declare <4 x i32> @vector_func()
 declare <16 x i8> @vector_func_16i8()

 !llvm.module.flags = !{!1}

 !1 = !{i32 1, !"ProfileSummary", !2}
 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}
 !3 = !{!"ProfileFormat", !"InstrProf"}
 !4 = !{!"TotalCount", i64 1460617}
 !5 = !{!"MaxCount", i64 849536}
 !6 = !{!"MaxInternalCount", i64 32769}
 !7 = !{!"MaxFunctionCount", i64 849536}
 !8 = !{!"NumCounts", i64 23784}
 !9 = !{!"NumFunctions", i64 3301}
 !10 = !{!"IsPartialProfile", i64 0}
 !11 = !{!"PartialProfileRatio", double 0.000000e+00}
 !12 = !{!"DetailedSummary", !13}
 !13 = !{!14, !15}
 !14 = !{i32 990000, i64 166, i32 73}
 !15 = !{i32 999999, i64 3, i32 1463}
 !16 = !{!"function_entry_count", i64 1}
 !17 = !{!"function_entry_count", i64 100000}
 !18 = !{!"branch_weights", i32 1, i32 99999}
	; RUN: llc -mtriple=aarch64 -partition-static-data-sections \
	; RUN: -function-sections -unique-section-names=false \
	; RUN: %s -o - 2>&1 \| FileCheck %s --dump-input=always

	; Repeat the RUN command above for big-endian systems.
	; RUN: llc -mtriple=aarch64_be -partition-static-data-sections \
	; RUN: -function-sections -unique-section-names=false \
	; RUN: %s -o - 2>&1 \| FileCheck %s --dump-input=always

	; Tests that constant pool hotness is aggregated across the module. The
	; static-data-splitter processes data from cold_func first, unprofiled_func
	; secondly, and then hot_func. Specifically, tests that
	; - If a constant is accessed by hot functions, all constant pools for this
	; constant (e.g., from an unprofiled function, or cold function) should have
	; `.hot` suffix. For instance, double 0.68 is seen by both @cold_func and
	; @hot_func, so two CPI emits (under label LCPI0_0 and LCPI2_0) have `.hot`
	; suffix.
	; - Similarly if a constant is accessed by both cold function and un-profiled
	; function, constant pools for this constant should not have `.unlikely` suffix.

	;; Constant pools for function @cold_func.
	; CHECK: .section .rodata.cst8.hot.,"aM",@progbits,8
	; CHECK-NEXT: .p2align
	; CHECK-NEXT: .LCPI0_0:
	; CHECK-NEXT: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005
	; CHECK-NEXT: .section .rodata.cst8.unlikely.,"aM",@progbits,8
	; CHECK-NEXT: .p2align
	; CHECK-NEXT: .LCPI0_1:
	; CHECK-NEXT: .xword 0x3fe5eb851eb851ec // double 0.68500000000000005
	; CHECK-NEXT: .section .rodata.cst8,"aM",@progbits,8
	; CHECK-NEXT: .p2align
	; CHECK-NEXT: .LCPI0_2:
	; CHECK-NEXT: .byte 0 // 0x0
	; CHECK-NEXT: .byte 4 // 0x4
	; CHECK-NEXT: .byte 8 // 0x8
	; CHECK-NEXT: .byte 12 // 0xc
	; CHECK-NEXT: .byte 255 // 0xff
	; CHECK-NEXT: .byte 255 // 0xff
	; CHECK-NEXT: .byte 255 // 0xff
	; CHECK-NEXT: .byte 255 // 0xff

	;; Constant pools for function @unprofiled_func
	; CHECK: .section .rodata.cst8,"aM",@progbits,8
	; CHECK-NEXT: .p2align
	; CHECK-NEXT: .LCPI1_0:
	; CHECK-NEXT: .byte 0 // 0x0
	; CHECK-NEXT: .byte 4 // 0x4
	; CHECK-NEXT: .byte 8 // 0x8
	; CHECK-NEXT: .byte 12 // 0xc
	; CHECK-NEXT: .byte 255 // 0xff
	; CHECK-NEXT: .byte 255 // 0xff
	; CHECK-NEXT: .byte 255 // 0xff
	; CHECK-NEXT: .byte 255 // 0xff
	; CHECK-NEXT: .section .rodata.cst16,"aM",@progbits,16
	; CHECK-NEXT: .p2align
	; CHECK-NEXT: .LCPI1_1:
	; CHECK-NEXT: .word 2 // 0x2
	; CHECK-NEXT: .word 3 // 0x3
	; CHECK-NEXT: .word 5 // 0x5
	; CHECK-NEXT: .word 7 // 0x7
	; CHECK-NEXT: .section .rodata.cst16.hot.,"aM",@progbits,16
	; CHECK-NEXT: .p2align
	; CHECK-NEXT: .LCPI1_2:
	; CHECK-NEXT: .word 442 // 0x1ba
	; CHECK-NEXT: .word 100 // 0x64
	; CHECK-NEXT: .word 0 // 0x0
	; CHECK-NEXT: .word 0 // 0x0

	;; Constant pools for function @hot_func
	; CHECK: .section .rodata.cst8.hot.,"aM",@progbits,8
	; CHECK-NEXT: .p2align
	; CHECK-NEXT: .LCPI2_0:
	; CHECK-NEXT: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005
	; CHECK-NEXT: .section .rodata.cst16.hot.,"aM",@progbits,16
	; CHECK-NEXT: .p2align
	; CHECK-NEXT: .LCPI2_1:
	; CHECK-NEXT: .word 0 // 0x0
	; CHECK-NEXT: .word 100 // 0x64
	; CHECK-NEXT: .word 0 // 0x0
	; CHECK-NEXT: .word 442 // 0x1ba
	; CHECK-NEXT: .LCPI2_2:
	; CHECK-NEXT: .word 442 // 0x1ba
	; CHECK-NEXT: .word 100 // 0x64
	; CHECK-NEXT: .word 0 // 0x0
	; CHECK-NEXT: .word 0 // 0x0

	;; For global variable @val
	;; The section name remains `.rodata.cst32` without hotness prefix because
	;; the variable has external linkage and not analyzed. Compiler need symbolized
	;; data access profiles to annotate such global variables' hotness.
	; CHECK: .section .rodata.cst32,"aM",@progbits,32
	; CHECK-NEXT: .globl val

	define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 {
	%2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
	%num = tail call i32 (...) @func_taking_arbitrary_param(double 6.8500000e-01)
	%t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
	%t2 = bitcast <8 x i8> %t1 to <2 x i32>
	%3 = extractelement <2 x i32> %t2, i32 1
	%sum = add i32 %2, %3
	%ret = add i32 %sum, %num
	ret i32 %ret
	}

	declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>)
	declare i32 @func_taking_arbitrary_param(...)

	define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) {
	%t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
	%t2 = bitcast <8 x i8> %t1 to <4 x i16>
	%t3 = zext <4 x i16> %t2 to <4 x i32>
	%t4 = add <4 x i32> %t3, <i32 2, i32 3, i32 5, i32 7>
	%cmp = icmp ule <4 x i32> <i32 442, i32 100, i32 0, i32 0>, %t4
	ret <4 x i1> %cmp
	}

	define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 {
	%2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
	%b = add <4 x i32> <i32 0, i32 100, i32 0, i32 442>, %a
	%c = icmp ule <4 x i32> %b, <i32 442, i32 100, i32 0, i32 0>
	ret <4 x i1> %c
	}

	@val = unnamed_addr constant i256 1

	define i32 @main(i32 %0, ptr %1) !prof !16 {
	br label %7

	5: ; preds = %7
	%x = call double @double_func()
	%a = call <16 x i8> @vector_func_16i8()
	%b = call <16 x i8> @vector_func_16i8()
	call void @cold_func(double %x, <16 x i8> %a, <16 x i8> %b)
	ret i32 0

	7: ; preds = %7, %2
	%8 = phi i32 [ 0, %2 ], [ %10, %7 ]
	%seed_val = load i256, ptr @val
	%9 = call i32 @seed(i256 %seed_val)
	call void @hot_func(i32 %9)
	%10 = add i32 %8, 1
	%11 = icmp eq i32 %10, 100000
	br i1 %11, label %5, label %7, !prof !18
	}

	declare i32 @seed(i256)
	declare double @double_func()
	declare <4 x i32> @vector_func()
	declare <16 x i8> @vector_func_16i8()

	!llvm.module.flags = !{!1}

	!1 = !{i32 1, !"ProfileSummary", !2}
	!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}
	!3 = !{!"ProfileFormat", !"InstrProf"}
	!4 = !{!"TotalCount", i64 1460617}
	!5 = !{!"MaxCount", i64 849536}
	!6 = !{!"MaxInternalCount", i64 32769}
	!7 = !{!"MaxFunctionCount", i64 849536}
	!8 = !{!"NumCounts", i64 23784}
	!9 = !{!"NumFunctions", i64 3301}
	!10 = !{!"IsPartialProfile", i64 0}
	!11 = !{!"PartialProfileRatio", double 0.000000e+00}
	!12 = !{!"DetailedSummary", !13}
	!13 = !{!14, !15}
	!14 = !{i32 990000, i64 166, i32 73}
	!15 = !{i32 999999, i64 3, i32 1463}
	!16 = !{!"function_entry_count", i64 1}
	!17 = !{!"function_entry_count", i64 100000}
	!18 = !{!"branch_weights", i32 1, i32 99999}