llvm/test/CodeGen/NVPTX/inline-asm-line-info-per-instruction.ll - llvm-project - Git at Google

 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_75 | FileCheck %s
 ; RUN: %if ptxas-sm_75 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_75 | %ptxas-verify -arch=sm_75 %}

 ;; Test that debug location info is emitted before each inline asm instruction.
 ;; Each add.f32 instruction should have its own .loc directive.

 ;; __global__ void vectorAdd(float *v) {
 ;;     float a, b;
 ;;
 ;;     // Inline PTX to perform two additions using raw strings and scoped with brackets
 ;;     asm volatile(R"({
 ;;
 ;;
 ;;
 ;; add.f32 %0, %2, %3;
 ;;         add.f32 %1, %4, %5;
 ;;     })"
 ;;         : "=f"(a)
 ;;         , "=f"(b)
 ;;         : "f"(v[0])
 ;;         , "f"(v[1])
 ;;         , "f"(v[2])
 ;;         , "f"(v[3])
 ;;     );
 ;;
 ;;     *v = a + b;
 ;; }

 target datalayout = "e-p:64:64:64-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-f128:128:128-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64-a:8:8-p6:32:32"
 target triple = "nvptx64-nvidia-cuda"

 ; CHECK: .loc [[FILEID:[0-9]+]] 9 5
 ; CHECK: add.f32
 ; CHECK: .loc [[FILEID]] 10 5
 ; CHECK: add.f32

 define void @_Z9vectorAddPf(ptr noundef %v) #0 !dbg !8 {
 entry:
   %arrayidx = getelementptr inbounds float, ptr %v, i32 0, !dbg !17
   %tmp1 = load float, ptr %arrayidx, align 4, !dbg !17
   %arrayidx3 = getelementptr inbounds float, ptr %v, i32 1, !dbg !19
   %tmp4 = load float, ptr %arrayidx3, align 4, !dbg !19
   %arrayidx6 = getelementptr inbounds float, ptr %v, i32 2, !dbg !20
   %tmp7 = load float, ptr %arrayidx6, align 4, !dbg !20
   %arrayidx9 = getelementptr inbounds float, ptr %v, i32 3, !dbg !21
   %tmp10 = load float, ptr %arrayidx9, align 4, !dbg !21
   %0 = call { float, float } asm sideeffect "{\0A\0A\0A        \0Aadd.f32 $0, $2, $3;\0A        add.f32 $1, $4, $5;\0A    }", "=f,=f,f,f,f,f"(float %tmp1, float %tmp4, float %tmp7, float %tmp10) #1, !dbg !22
   %asmresult = extractvalue { float, float } %0, 0, !dbg !22
   %asmresult11 = extractvalue { float, float } %0, 1, !dbg !22
   %add = fadd float %asmresult, %asmresult11, !dbg !26
   store float %add, ptr %v, align 4, !dbg !26
   ret void, !dbg !27
 }

 attributes #0 = { alwaysinline convergent mustprogress willreturn "nvvm.kernel" "target-cpu"="sm_75" }
 attributes #1 = { convergent nounwind }

 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!6}

 !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "lgenfe", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
 !1 = !DIFile(filename: "vectorAdd.cu", directory: "/test")
 !6 = !{i32 2, !"Debug Info Version", i32 3}
 !8 = distinct !DISubprogram(name: "vectorAdd", linkageName: "_Z9vectorAddPf", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !14)
 !9 = !DISubroutineType(types: !10)
 !10 = !{!11, !12}
 !11 = !DIBasicType(tag: DW_TAG_unspecified_type, name: "void")
 !12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 64, align: 64)
 !13 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
 !14 = !{}
 !17 = !DILocation(line: 14, column: 15, scope: !18)
 !18 = distinct !DILexicalBlock(scope: !8, file: !1, line: 1, column: 30)
 !19 = !DILocation(line: 15, column: 15, scope: !18)
 !20 = !DILocation(line: 16, column: 15, scope: !18)
 !21 = !DILocation(line: 17, column: 15, scope: !18)
 !22 = !DILocation(line: 5, column: 5, scope: !18)
 !26 = !DILocation(line: 20, column: 5, scope: !18)
 !27 = !DILocation(line: 21, column: 1, scope: !18)
	; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_75 \| FileCheck %s
	; RUN: %if ptxas-sm_75 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_75 \| %ptxas-verify -arch=sm_75 %}

	;; Test that debug location info is emitted before each inline asm instruction.
	;; Each add.f32 instruction should have its own .loc directive.

	;; __global__ void vectorAdd(float *v) {
	;; float a, b;
	;;
	;; // Inline PTX to perform two additions using raw strings and scoped with brackets
	;; asm volatile(R"({
	;;
	;;
	;;
	;; add.f32 %0, %2, %3;
	;; add.f32 %1, %4, %5;
	;; })"
	;; : "=f"(a)
	;; , "=f"(b)
	;; : "f"(v[0])
	;; , "f"(v[1])
	;; , "f"(v[2])
	;; , "f"(v[3])
	;; );
	;;
	;; *v = a + b;
	;; }

	target datalayout = "e-p:64:64:64-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-f128:128:128-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64-a:8:8-p6:32:32"
	target triple = "nvptx64-nvidia-cuda"

	; CHECK: .loc [[FILEID:[0-9]+]] 9 5
	; CHECK: add.f32
	; CHECK: .loc [[FILEID]] 10 5
	; CHECK: add.f32

	define void @_Z9vectorAddPf(ptr noundef %v) #0 !dbg !8 {
	entry:
	%arrayidx = getelementptr inbounds float, ptr %v, i32 0, !dbg !17
	%tmp1 = load float, ptr %arrayidx, align 4, !dbg !17
	%arrayidx3 = getelementptr inbounds float, ptr %v, i32 1, !dbg !19
	%tmp4 = load float, ptr %arrayidx3, align 4, !dbg !19
	%arrayidx6 = getelementptr inbounds float, ptr %v, i32 2, !dbg !20
	%tmp7 = load float, ptr %arrayidx6, align 4, !dbg !20
	%arrayidx9 = getelementptr inbounds float, ptr %v, i32 3, !dbg !21
	%tmp10 = load float, ptr %arrayidx9, align 4, !dbg !21
	%0 = call { float, float } asm sideeffect "{\0A\0A\0A \0Aadd.f32 $0, $2, $3;\0A add.f32 $1, $4, $5;\0A }", "=f,=f,f,f,f,f"(float %tmp1, float %tmp4, float %tmp7, float %tmp10) #1, !dbg !22
	%asmresult = extractvalue { float, float } %0, 0, !dbg !22
	%asmresult11 = extractvalue { float, float } %0, 1, !dbg !22
	%add = fadd float %asmresult, %asmresult11, !dbg !26
	store float %add, ptr %v, align 4, !dbg !26
	ret void, !dbg !27
	}

	attributes #0 = { alwaysinline convergent mustprogress willreturn "nvvm.kernel" "target-cpu"="sm_75" }
	attributes #1 = { convergent nounwind }

	!llvm.dbg.cu = !{!0}
	!llvm.module.flags = !{!6}

	!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "lgenfe", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
	!1 = !DIFile(filename: "vectorAdd.cu", directory: "/test")
	!6 = !{i32 2, !"Debug Info Version", i32 3}
	!8 = distinct !DISubprogram(name: "vectorAdd", linkageName: "_Z9vectorAddPf", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !14)
	!9 = !DISubroutineType(types: !10)
	!10 = !{!11, !12}
	!11 = !DIBasicType(tag: DW_TAG_unspecified_type, name: "void")
	!12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 64, align: 64)
	!13 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
	!14 = !{}
	!17 = !DILocation(line: 14, column: 15, scope: !18)
	!18 = distinct !DILexicalBlock(scope: !8, file: !1, line: 1, column: 30)
	!19 = !DILocation(line: 15, column: 15, scope: !18)
	!20 = !DILocation(line: 16, column: 15, scope: !18)
	!21 = !DILocation(line: 17, column: 15, scope: !18)
	!22 = !DILocation(line: 5, column: 5, scope: !18)
	!26 = !DILocation(line: 20, column: 5, scope: !18)
	!27 = !DILocation(line: 21, column: 1, scope: !18)