|  | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefix=STDERR %s | 
|  | ; RUN: FileCheck -check-prefix=REMARK %s < %t | 
|  |  | 
|  | ; STDERR: remark: foo.cl:27:0: Function Name: test_kernel | 
|  | ; STDERR-NEXT: remark: foo.cl:27:0:     TotalSGPRs: 28 | 
|  | ; STDERR-NEXT: remark: foo.cl:27:0:     VGPRs: 9 | 
|  | ; STDERR-NEXT: remark: foo.cl:27:0:     AGPRs: 43 | 
|  | ; STDERR-NEXT: remark: foo.cl:27:0:     ScratchSize [bytes/lane]: 0 | 
|  | ; STDERR-NEXT: remark: foo.cl:27:0:     Dynamic Stack: False | 
|  | ; STDERR-NEXT: remark: foo.cl:27:0:     Occupancy [waves/SIMD]: 5 | 
|  | ; STDERR-NEXT: remark: foo.cl:27:0:     SGPRs Spill: 0 | 
|  | ; STDERR-NEXT: remark: foo.cl:27:0:     VGPRs Spill: 0 | 
|  | ; STDERR-NEXT: remark: foo.cl:27:0:     LDS Size [bytes/block]: 512 | 
|  |  | 
|  | ; REMARK-LABEL: --- !Analysis | 
|  | ; REMARK: Pass:            kernel-resource-usage | 
|  | ; REMARK-NEXT: Name:            FunctionName | 
|  | ; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 } | 
|  | ; REMARK-NEXT: Function:        test_kernel | 
|  | ; REMARK-NEXT: Args: | 
|  | ; REMARK-NEXT:   - String:          'Function Name: ' | 
|  | ; REMARK-NEXT:   - FunctionName:      test_kernel | 
|  | ; REMARK-NEXT: ... | 
|  | ; REMARK-NEXT: --- !Analysis | 
|  | ; REMARK-NEXT: Pass:            kernel-resource-usage | 
|  | ; REMARK-NEXT: Name:            NumSGPR | 
|  | ; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 } | 
|  | ; REMARK-NEXT: Function:        test_kernel | 
|  | ; REMARK-NEXT: Args: | 
|  | ; REMARK-NEXT:   - String:          '    TotalSGPRs: ' | 
|  | ; REMARK-NEXT:   - NumSGPR:         '28' | 
|  | ; REMARK-NEXT: ... | 
|  | ; REMARK-NEXT: --- !Analysis | 
|  | ; REMARK-NEXT: Pass:            kernel-resource-usage | 
|  | ; REMARK-NEXT: Name:            NumVGPR | 
|  | ; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 } | 
|  | ; REMARK-NEXT: Function:        test_kernel | 
|  | ; REMARK-NEXT: Args: | 
|  | ; REMARK-NEXT:   - String:          '    VGPRs: ' | 
|  | ; REMARK-NEXT:   - NumVGPR:         '9' | 
|  | ; REMARK-NEXT: ... | 
|  | ; REMARK-NEXT: --- !Analysis | 
|  | ; REMARK-NEXT: Pass:            kernel-resource-usage | 
|  | ; REMARK-NEXT: Name:            NumAGPR | 
|  | ; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 } | 
|  | ; REMARK-NEXT: Function:        test_kernel | 
|  | ; REMARK-NEXT: Args: | 
|  | ; REMARK-NEXT:   - String:          '    AGPRs: ' | 
|  | ; REMARK-NEXT:   - NumAGPR:         '43' | 
|  | ; REMARK-NEXT: ... | 
|  | ; REMARK-NEXT: --- !Analysis | 
|  | ; REMARK-NEXT: Pass:            kernel-resource-usage | 
|  | ; REMARK-NEXT: Name:            ScratchSize | 
|  | ; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 } | 
|  | ; REMARK-NEXT: Function:        test_kernel | 
|  | ; REMARK-NEXT: Args: | 
|  | ; REMARK-NEXT:   - String:          '    ScratchSize [bytes/lane]: ' | 
|  | ; REMARK-NEXT:   - ScratchSize:     '0' | 
|  | ; REMARK-NEXT: .. | 
|  | ; REMARK-NEXT: --- !Analysis | 
|  | ; REMARK-NEXT: Pass:            kernel-resource-usage | 
|  | ; REMARK-NEXT: Name:            DynamicStack | 
|  | ; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 } | 
|  | ; REMARK-NEXT: Function:        test_kernel | 
|  | ; REMARK-NEXT: Args: | 
|  | ; REMARK-NEXT:   - String: ' Dynamic Stack: | 
|  | ; REMARK-NEXT:   - DynamicStack: 'False' | 
|  | ; REMARK-NEXT: .. | 
|  | ; REMARK-NEXT: --- !Analysis | 
|  | ; REMARK-NEXT: Pass:            kernel-resource-usage | 
|  | ; REMARK-NEXT: Name:            Occupancy | 
|  | ; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 } | 
|  | ; REMARK-NEXT: Function:        test_kernel | 
|  | ; REMARK-NEXT: Args: | 
|  | ; REMARK-NEXT:   - String:          '    Occupancy [waves/SIMD]: ' | 
|  | ; REMARK-NEXT:   - Occupancy:       '5' | 
|  | ; REMARK-NEXT: ... | 
|  | ; REMARK-NEXT: --- !Analysis | 
|  | ; REMARK-NEXT: Pass:            kernel-resource-usage | 
|  | ; REMARK-NEXT: Name:            SGPRSpill | 
|  | ; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 } | 
|  | ; REMARK-NEXT: Function:        test_kernel | 
|  | ; REMARK-NEXT: Args: | 
|  | ; REMARK-NEXT:   - String:          '    SGPRs Spill: ' | 
|  | ; REMARK-NEXT:   - SGPRSpill:       '0' | 
|  | ; REMARK-NEXT: ... | 
|  | ; REMARK-NEXT: --- !Analysis | 
|  | ; REMARK-NEXT: Pass:            kernel-resource-usage | 
|  | ; REMARK-NEXT: Name:            VGPRSpill | 
|  | ; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 } | 
|  | ; REMARK-NEXT: Function:        test_kernel | 
|  | ; REMARK-NEXT: Args: | 
|  | ; REMARK-NEXT:   - String:          '    VGPRs Spill: ' | 
|  | ; REMARK-NEXT:   - VGPRSpill:       '0' | 
|  | ; REMARK-NEXT: ... | 
|  | ; REMARK-NEXT: --- !Analysis | 
|  | ; REMARK-NEXT: Pass:            kernel-resource-usage | 
|  | ; REMARK-NEXT: Name:            BytesLDS | 
|  | ; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 } | 
|  | ; REMARK-NEXT: Function:        test_kernel | 
|  | ; REMARK-NEXT: Args: | 
|  | ; REMARK-NEXT:   - String:          '    LDS Size [bytes/block]: ' | 
|  | ; REMARK-NEXT:   - BytesLDS:        '512' | 
|  | ; REMARK-NEXT: ... | 
|  |  | 
|  | @lds = internal unnamed_addr addrspace(3) global [128 x i32] poison, align 4 | 
|  |  | 
|  | define amdgpu_kernel void @test_kernel() !dbg !3 { | 
|  | call void asm sideeffect "; clobber v8", "~{v8}"() | 
|  | call void asm sideeffect "; clobber s23", "~{s23}"() | 
|  | call void asm sideeffect "; clobber a42", "~{a42}"() | 
|  | call void asm sideeffect "; use $0", "v"(ptr addrspace(3) @lds) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; STDERR-NOT: test_func | 
|  | define void @test_func() !dbg !6 { | 
|  | call void asm sideeffect "; clobber v17", "~{v17}"() | 
|  | call void asm sideeffect "; clobber s11", "~{s11}"() | 
|  | call void asm sideeffect "; clobber a9", "~{a9}"() | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; STDERR: remark: foo.cl:8:0: Function Name: empty_kernel | 
|  | ; STDERR-NEXT: remark: foo.cl:8:0:     TotalSGPRs: 4 | 
|  | ; STDERR-NEXT: remark: foo.cl:8:0:     VGPRs: 0 | 
|  | ; STDERR-NEXT: remark: foo.cl:8:0:     AGPRs: 0 | 
|  | ; STDERR-NEXT: remark: foo.cl:8:0:     ScratchSize [bytes/lane]: 0 | 
|  | ; STDERR-NEXT: remark: foo.cl:8:0:     Dynamic Stack: False | 
|  | ; STDERR-NEXT: remark: foo.cl:8:0:     Occupancy [waves/SIMD]: 10 | 
|  | ; STDERR-NEXT: remark: foo.cl:8:0:     SGPRs Spill: 0 | 
|  | ; STDERR-NEXT: remark: foo.cl:8:0:     VGPRs Spill: 0 | 
|  | ; STDERR-NEXT: remark: foo.cl:8:0:     LDS Size [bytes/block]: 0 | 
|  | define amdgpu_kernel void @empty_kernel() !dbg !7 { | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; STDERR-NOT: empty_func | 
|  | define void @empty_func() !dbg !8 { | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call | 
|  | ; STDERR-NEXT: remark: foo.cl:64:0:     TotalSGPRs: test_indirect_call.numbered_sgpr+6 | 
|  | ; STDERR-NEXT: remark: foo.cl:64:0:     VGPRs: test_indirect_call.num_vgpr | 
|  | ; STDERR-NEXT: remark: foo.cl:64:0:     AGPRs: test_indirect_call.num_agpr | 
|  | ; STDERR-NEXT: remark: foo.cl:64:0:     ScratchSize [bytes/lane]: 0 | 
|  | ; STDERR-NEXT: remark: foo.cl:64:0:     Dynamic Stack: True | 
|  | ; STDERR-NEXT: remark: foo.cl:64:0:     Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 10, max(test_indirect_call.numbered_sgpr+extrasgprs(test_indirect_call.uses_vcc, test_indirect_call.uses_flat_scratch, 1), 1, 0), max(totalnumvgprs(test_indirect_call.num_agpr, test_indirect_call.num_vgpr), 1, 0)) | 
|  | ; STDERR-NEXT: remark: foo.cl:64:0:     SGPRs Spill: 0 | 
|  | ; STDERR-NEXT: remark: foo.cl:64:0:     VGPRs Spill: 0 | 
|  | ; STDERR-NEXT: remark: foo.cl:64:0:     LDS Size [bytes/block]: 0 | 
|  | @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 | 
|  |  | 
|  | define amdgpu_kernel void @test_indirect_call() !dbg !9 { | 
|  | %fptr = load ptr, ptr addrspace(4) @gv.fptr0 | 
|  | call void %fptr() | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack | 
|  | ; STDERR-NEXT: remark: foo.cl:74:0:     TotalSGPRs: test_indirect_w_static_stack.numbered_sgpr+6 | 
|  | ; STDERR-NEXT: remark: foo.cl:74:0:     VGPRs: test_indirect_w_static_stack.num_vgpr | 
|  | ; STDERR-NEXT: remark: foo.cl:74:0:     AGPRs: test_indirect_w_static_stack.num_agpr | 
|  | ; STDERR-NEXT: remark: foo.cl:74:0:     ScratchSize [bytes/lane]: 144 | 
|  | ; STDERR-NEXT: remark: foo.cl:74:0:     Dynamic Stack: True | 
|  | ; STDERR-NEXT: remark: foo.cl:74:0:     Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 10, max(test_indirect_w_static_stack.numbered_sgpr+extrasgprs(test_indirect_w_static_stack.uses_vcc, test_indirect_w_static_stack.uses_flat_scratch, 1), 1, 0), max(totalnumvgprs(test_indirect_w_static_stack.num_agpr, test_indirect_w_static_stack.num_vgpr), 1, 0)) | 
|  | ; STDERR-NEXT: remark: foo.cl:74:0:     SGPRs Spill: 0 | 
|  | ; STDERR-NEXT: remark: foo.cl:74:0:     VGPRs Spill: 0 | 
|  | ; STDERR-NEXT: remark: foo.cl:74:0:     LDS Size [bytes/block]: 0 | 
|  |  | 
|  | declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture readonly, i8, i64, i1 immarg) | 
|  |  | 
|  | define amdgpu_kernel void @test_indirect_w_static_stack() !dbg !10 { | 
|  | %alloca = alloca <10 x i64>, align 16, addrspace(5) | 
|  | call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 40, i1 false) | 
|  | %fptr = load ptr, ptr addrspace(4) @gv.fptr0 | 
|  | call void %fptr() | 
|  | ret void | 
|  | } | 
|  |  | 
|  | !llvm.dbg.cu = !{!0} | 
|  | !llvm.module.flags = !{!2} | 
|  | !llvm.module.flags = !{!11} | 
|  |  | 
|  | !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) | 
|  | !1 = !DIFile(filename: "foo.cl", directory: "/tmp") | 
|  | !2 = !{i32 2, !"Debug Info Version", i32 3} | 
|  | !3 = distinct !DISubprogram(name: "test_kernel", scope: !1, file: !1, type: !4, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) | 
|  | !4 = !DISubroutineType(types: !5) | 
|  | !5 = !{null} | 
|  | !6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) | 
|  | !7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: !4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) | 
|  | !8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) | 
|  | !9 = distinct !DISubprogram(name: "test_indirect_call", scope: !1, file: !1, type: !4, scopeLine: 64, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) | 
|  | !10 = distinct !DISubprogram(name: "test_indirect_w_static_stack", scope: !1, file: !1, type: !4, scopeLine: 74, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) | 
|  | !11 = !{i32 1, !"amdhsa_code_object_version", i32 500} |