| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mtriple=nvptx64-- -mtriple=nvptx64 -mcpu=sm_52 -mattr=+ptx64 < %s | FileCheck %s --check-prefix=CHECK-PTX |
| ; RUN: %if ptxas-isa-6.4 %{ llc < %s -mtriple=nvptx64-- -mtriple=nvptx64 -mcpu=sm_52 -mattr=+ptx64 | %ptxas-verify %} |
| |
| %struct.S1 = type { i32, i8, i64 } |
| %struct.S2 = type { i64, i64 } |
| |
| @__const.bar.s1 = private unnamed_addr constant %struct.S1 { i32 1, i8 1, i64 1 }, align 8 |
| @__const.qux.s = private unnamed_addr constant %struct.S2 { i64 1, i64 1 }, align 8 |
| |
| define dso_local i32 @variadics1(i32 noundef %first, ...) { |
| ; CHECK-PTX-LABEL: variadics1( |
| ; CHECK-PTX: { |
| ; CHECK-PTX-NEXT: .reg .b32 %r<11>; |
| ; CHECK-PTX-NEXT: .reg .b64 %rd<17>; |
| ; CHECK-PTX-EMPTY: |
| ; CHECK-PTX-NEXT: // %bb.0: // %entry |
| ; CHECK-PTX-NEXT: ld.param.b32 %r1, [variadics1_param_0]; |
| ; CHECK-PTX-NEXT: ld.param.b64 %rd1, [variadics1_param_1]; |
| ; CHECK-PTX-NEXT: ld.b32 %r2, [%rd1]; |
| ; CHECK-PTX-NEXT: add.s32 %r3, %r1, %r2; |
| ; CHECK-PTX-NEXT: ld.b32 %r4, [%rd1+4]; |
| ; CHECK-PTX-NEXT: add.s32 %r5, %r3, %r4; |
| ; CHECK-PTX-NEXT: ld.b32 %r6, [%rd1+8]; |
| ; CHECK-PTX-NEXT: add.s32 %r7, %r5, %r6; |
| ; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 19; |
| ; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8; |
| ; CHECK-PTX-NEXT: ld.b64 %rd4, [%rd3]; |
| ; CHECK-PTX-NEXT: cvt.u64.u32 %rd5, %r7; |
| ; CHECK-PTX-NEXT: add.s64 %rd6, %rd5, %rd4; |
| ; CHECK-PTX-NEXT: cvt.u32.u64 %r8, %rd6; |
| ; CHECK-PTX-NEXT: add.s64 %rd7, %rd3, 15; |
| ; CHECK-PTX-NEXT: and.b64 %rd8, %rd7, -8; |
| ; CHECK-PTX-NEXT: ld.b64 %rd9, [%rd8]; |
| ; CHECK-PTX-NEXT: cvt.rn.f64.s32 %rd10, %r8; |
| ; CHECK-PTX-NEXT: add.rn.f64 %rd11, %rd10, %rd9; |
| ; CHECK-PTX-NEXT: cvt.rzi.s32.f64 %r9, %rd11; |
| ; CHECK-PTX-NEXT: add.s64 %rd12, %rd8, 15; |
| ; CHECK-PTX-NEXT: and.b64 %rd13, %rd12, -8; |
| ; CHECK-PTX-NEXT: ld.b64 %rd14, [%rd13]; |
| ; CHECK-PTX-NEXT: cvt.rn.f64.s32 %rd15, %r9; |
| ; CHECK-PTX-NEXT: add.rn.f64 %rd16, %rd15, %rd14; |
| ; CHECK-PTX-NEXT: cvt.rzi.s32.f64 %r10, %rd16; |
| ; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r10; |
| ; CHECK-PTX-NEXT: ret; |
| entry: |
| %vlist = alloca ptr, align 8 |
| call void @llvm.va_start.p0(ptr %vlist) |
| %argp.cur = load ptr, ptr %vlist, align 8 |
| %argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 4 |
| store ptr %argp.next, ptr %vlist, align 8 |
| %0 = load i32, ptr %argp.cur, align 4 |
| %add = add nsw i32 %first, %0 |
| %argp.cur1 = load ptr, ptr %vlist, align 8 |
| %argp.next2 = getelementptr inbounds i8, ptr %argp.cur1, i64 4 |
| store ptr %argp.next2, ptr %vlist, align 8 |
| %1 = load i32, ptr %argp.cur1, align 4 |
| %add3 = add nsw i32 %add, %1 |
| %argp.cur4 = load ptr, ptr %vlist, align 8 |
| %argp.next5 = getelementptr inbounds i8, ptr %argp.cur4, i64 4 |
| store ptr %argp.next5, ptr %vlist, align 8 |
| %2 = load i32, ptr %argp.cur4, align 4 |
| %add6 = add nsw i32 %add3, %2 |
| %argp.cur7 = load ptr, ptr %vlist, align 8 |
| %3 = getelementptr inbounds i8, ptr %argp.cur7, i32 7 |
| %argp.cur7.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %3, i64 -8) |
| %argp.next8 = getelementptr inbounds i8, ptr %argp.cur7.aligned, i64 8 |
| store ptr %argp.next8, ptr %vlist, align 8 |
| %4 = load i64, ptr %argp.cur7.aligned, align 8 |
| %conv = sext i32 %add6 to i64 |
| %add9 = add nsw i64 %conv, %4 |
| %conv10 = trunc i64 %add9 to i32 |
| %argp.cur11 = load ptr, ptr %vlist, align 8 |
| %5 = getelementptr inbounds i8, ptr %argp.cur11, i32 7 |
| %argp.cur11.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %5, i64 -8) |
| %argp.next12 = getelementptr inbounds i8, ptr %argp.cur11.aligned, i64 8 |
| store ptr %argp.next12, ptr %vlist, align 8 |
| %6 = load double, ptr %argp.cur11.aligned, align 8 |
| %conv13 = sitofp i32 %conv10 to double |
| %add14 = fadd double %conv13, %6 |
| %conv15 = fptosi double %add14 to i32 |
| %argp.cur16 = load ptr, ptr %vlist, align 8 |
| %7 = getelementptr inbounds i8, ptr %argp.cur16, i32 7 |
| %argp.cur16.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %7, i64 -8) |
| %argp.next17 = getelementptr inbounds i8, ptr %argp.cur16.aligned, i64 8 |
| store ptr %argp.next17, ptr %vlist, align 8 |
| %8 = load double, ptr %argp.cur16.aligned, align 8 |
| %conv18 = sitofp i32 %conv15 to double |
| %add19 = fadd double %conv18, %8 |
| %conv20 = fptosi double %add19 to i32 |
| call void @llvm.va_end.p0(ptr %vlist) |
| ret i32 %conv20 |
| } |
| |
| declare void @llvm.va_start.p0(ptr) |
| |
| declare ptr @llvm.ptrmask.p0.i64(ptr, i64) |
| |
| declare void @llvm.va_end.p0(ptr) |
| |
| define dso_local i32 @foo() { |
| ; CHECK-PTX-LABEL: foo( |
| ; CHECK-PTX: { |
| ; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot1[40]; |
| ; CHECK-PTX-NEXT: .reg .b64 %SP; |
| ; CHECK-PTX-NEXT: .reg .b64 %SPL; |
| ; CHECK-PTX-NEXT: .reg .b32 %r<2>; |
| ; CHECK-PTX-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-PTX-EMPTY: |
| ; CHECK-PTX-NEXT: // %bb.0: // %entry |
| ; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot1; |
| ; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; |
| ; CHECK-PTX-NEXT: st.b64 [%SP], 4294967297; |
| ; CHECK-PTX-NEXT: st.b32 [%SP+8], 1; |
| ; CHECK-PTX-NEXT: st.b64 [%SP+16], 1; |
| ; CHECK-PTX-NEXT: st.b64 [%SP+24], 4607182418800017408; |
| ; CHECK-PTX-NEXT: st.b64 [%SP+32], 4607182418800017408; |
| ; CHECK-PTX-NEXT: { // callseq 0, 0 |
| ; CHECK-PTX-NEXT: .param .b32 param0; |
| ; CHECK-PTX-NEXT: .param .b64 param1; |
| ; CHECK-PTX-NEXT: .param .b32 retval0; |
| ; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0; |
| ; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1; |
| ; CHECK-PTX-NEXT: st.param.b32 [param0], 1; |
| ; CHECK-PTX-NEXT: call.uni (retval0), variadics1, (param0, param1); |
| ; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0]; |
| ; CHECK-PTX-NEXT: } // callseq 0 |
| ; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r1; |
| ; CHECK-PTX-NEXT: ret; |
| entry: |
| %conv = sext i8 1 to i32 |
| %conv1 = sext i16 1 to i32 |
| %conv2 = fpext float 1.000000e+00 to double |
| %call = call i32 (i32, ...) @variadics1(i32 noundef 1, i32 noundef %conv, i32 noundef %conv1, i32 noundef 1, i64 noundef 1, double noundef %conv2, double noundef 1.000000e+00) |
| ret i32 %call |
| } |
| |
| define dso_local i32 @variadics2(i32 noundef %first, ...) { |
| ; CHECK-PTX-LABEL: variadics2( |
| ; CHECK-PTX: { |
| ; CHECK-PTX-NEXT: .local .align 1 .b8 __local_depot2[3]; |
| ; CHECK-PTX-NEXT: .reg .b64 %SP; |
| ; CHECK-PTX-NEXT: .reg .b64 %SPL; |
| ; CHECK-PTX-NEXT: .reg .b16 %rs<4>; |
| ; CHECK-PTX-NEXT: .reg .b32 %r<6>; |
| ; CHECK-PTX-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-PTX-EMPTY: |
| ; CHECK-PTX-NEXT: // %bb.0: // %entry |
| ; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot2; |
| ; CHECK-PTX-NEXT: ld.param.b32 %r1, [variadics2_param_0]; |
| ; CHECK-PTX-NEXT: ld.param.b64 %rd1, [variadics2_param_1]; |
| ; CHECK-PTX-NEXT: add.u64 %rd2, %SPL, 0; |
| ; CHECK-PTX-NEXT: add.s64 %rd3, %rd1, 7; |
| ; CHECK-PTX-NEXT: and.b64 %rd4, %rd3, -8; |
| ; CHECK-PTX-NEXT: ld.b32 %r2, [%rd4]; |
| ; CHECK-PTX-NEXT: ld.s8 %r3, [%rd4+4]; |
| ; CHECK-PTX-NEXT: ld.b8 %rs1, [%rd4+7]; |
| ; CHECK-PTX-NEXT: st.local.b8 [%rd2+2], %rs1; |
| ; CHECK-PTX-NEXT: ld.b8 %rs2, [%rd4+6]; |
| ; CHECK-PTX-NEXT: st.local.b8 [%rd2+1], %rs2; |
| ; CHECK-PTX-NEXT: ld.b8 %rs3, [%rd4+5]; |
| ; CHECK-PTX-NEXT: st.local.b8 [%rd2], %rs3; |
| ; CHECK-PTX-NEXT: ld.b64 %rd5, [%rd4+8]; |
| ; CHECK-PTX-NEXT: add.s32 %r4, %r1, %r2; |
| ; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3; |
| ; CHECK-PTX-NEXT: cvt.u64.u32 %rd6, %r5; |
| ; CHECK-PTX-NEXT: add.s64 %rd7, %rd6, %rd5; |
| ; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %rd7; |
| ; CHECK-PTX-NEXT: ret; |
| entry: |
| %vlist = alloca ptr, align 8 |
| %s1.sroa.3 = alloca [3 x i8], align 1 |
| call void @llvm.va_start.p0(ptr %vlist) |
| %argp.cur = load ptr, ptr %vlist, align 8 |
| %0 = getelementptr inbounds i8, ptr %argp.cur, i32 7 |
| %argp.cur.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %0, i64 -8) |
| %argp.next = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 16 |
| store ptr %argp.next, ptr %vlist, align 8 |
| %s1.sroa.0.0.copyload = load i32, ptr %argp.cur.aligned, align 8 |
| %s1.sroa.2.0.argp.cur.aligned.sroa_idx = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 4 |
| %s1.sroa.2.0.copyload = load i8, ptr %s1.sroa.2.0.argp.cur.aligned.sroa_idx, align 4 |
| %s1.sroa.3.0.argp.cur.aligned.sroa_idx = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 5 |
| call void @llvm.memcpy.p0.p0.i64(ptr align 1 %s1.sroa.3, ptr align 1 %s1.sroa.3.0.argp.cur.aligned.sroa_idx, i64 3, i1 false) |
| %s1.sroa.31.0.argp.cur.aligned.sroa_idx = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 8 |
| %s1.sroa.31.0.copyload = load i64, ptr %s1.sroa.31.0.argp.cur.aligned.sroa_idx, align 8 |
| %add = add nsw i32 %first, %s1.sroa.0.0.copyload |
| %conv = sext i8 %s1.sroa.2.0.copyload to i32 |
| %add1 = add nsw i32 %add, %conv |
| %conv2 = sext i32 %add1 to i64 |
| %add3 = add nsw i64 %conv2, %s1.sroa.31.0.copyload |
| %conv4 = trunc i64 %add3 to i32 |
| call void @llvm.va_end.p0(ptr %vlist) |
| ret i32 %conv4 |
| } |
| |
| declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) |
| |
| define dso_local i32 @bar() { |
| ; CHECK-PTX-LABEL: bar( |
| ; CHECK-PTX: { |
| ; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot3[24]; |
| ; CHECK-PTX-NEXT: .reg .b64 %SP; |
| ; CHECK-PTX-NEXT: .reg .b64 %SPL; |
| ; CHECK-PTX-NEXT: .reg .b16 %rs<4>; |
| ; CHECK-PTX-NEXT: .reg .b32 %r<2>; |
| ; CHECK-PTX-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-PTX-EMPTY: |
| ; CHECK-PTX-NEXT: // %bb.0: // %entry |
| ; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot3; |
| ; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; |
| ; CHECK-PTX-NEXT: add.u64 %rd1, %SPL, 0; |
| ; CHECK-PTX-NEXT: ld.global.nc.b8 %rs1, [__const_$_bar_$_s1+7]; |
| ; CHECK-PTX-NEXT: st.local.b8 [%rd1+2], %rs1; |
| ; CHECK-PTX-NEXT: ld.global.nc.b8 %rs2, [__const_$_bar_$_s1+6]; |
| ; CHECK-PTX-NEXT: st.local.b8 [%rd1+1], %rs2; |
| ; CHECK-PTX-NEXT: ld.global.nc.b8 %rs3, [__const_$_bar_$_s1+5]; |
| ; CHECK-PTX-NEXT: st.local.b8 [%rd1], %rs3; |
| ; CHECK-PTX-NEXT: st.b32 [%SP+8], 1; |
| ; CHECK-PTX-NEXT: st.b8 [%SP+12], 1; |
| ; CHECK-PTX-NEXT: st.b64 [%SP+16], 1; |
| ; CHECK-PTX-NEXT: { // callseq 1, 0 |
| ; CHECK-PTX-NEXT: .param .b32 param0; |
| ; CHECK-PTX-NEXT: .param .b64 param1; |
| ; CHECK-PTX-NEXT: .param .b32 retval0; |
| ; CHECK-PTX-NEXT: add.u64 %rd2, %SP, 8; |
| ; CHECK-PTX-NEXT: st.param.b64 [param1], %rd2; |
| ; CHECK-PTX-NEXT: st.param.b32 [param0], 1; |
| ; CHECK-PTX-NEXT: call.uni (retval0), variadics2, (param0, param1); |
| ; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0]; |
| ; CHECK-PTX-NEXT: } // callseq 1 |
| ; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r1; |
| ; CHECK-PTX-NEXT: ret; |
| entry: |
| %s1.sroa.3 = alloca [3 x i8], align 1 |
| %s1.sroa.0.0.copyload = load i32, ptr @__const.bar.s1, align 8 |
| %s1.sroa.2.0.copyload = load i8, ptr getelementptr inbounds (i8, ptr @__const.bar.s1, i64 4), align 4 |
| call void @llvm.memcpy.p0.p0.i64(ptr align 1 %s1.sroa.3, ptr align 1 getelementptr inbounds (i8, ptr @__const.bar.s1, i64 5), i64 3, i1 false) |
| %s1.sroa.31.0.copyload = load i64, ptr getelementptr inbounds (i8, ptr @__const.bar.s1, i64 8), align 8 |
| %call = call i32 (i32, ...) @variadics2(i32 noundef 1, i32 %s1.sroa.0.0.copyload, i8 %s1.sroa.2.0.copyload, i64 %s1.sroa.31.0.copyload) |
| ret i32 %call |
| } |
| |
| define dso_local i32 @variadics3(i32 noundef %first, ...) { |
| ; CHECK-PTX-LABEL: variadics3( |
| ; CHECK-PTX: { |
| ; CHECK-PTX-NEXT: .reg .b32 %r<8>; |
| ; CHECK-PTX-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-PTX-EMPTY: |
| ; CHECK-PTX-NEXT: // %bb.0: // %entry |
| ; CHECK-PTX-NEXT: ld.param.b64 %rd1, [variadics3_param_1]; |
| ; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 15; |
| ; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -16; |
| ; CHECK-PTX-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd3]; |
| ; CHECK-PTX-NEXT: add.s32 %r5, %r1, %r2; |
| ; CHECK-PTX-NEXT: add.s32 %r6, %r5, %r3; |
| ; CHECK-PTX-NEXT: add.s32 %r7, %r6, %r4; |
| ; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r7; |
| ; CHECK-PTX-NEXT: ret; |
| entry: |
| %vlist = alloca ptr, align 8 |
| call void @llvm.va_start.p0(ptr %vlist) |
| %argp.cur = load ptr, ptr %vlist, align 8 |
| %0 = getelementptr inbounds i8, ptr %argp.cur, i32 15 |
| %argp.cur.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %0, i64 -16) |
| %argp.next = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 16 |
| store ptr %argp.next, ptr %vlist, align 8 |
| %1 = load <4 x i32>, ptr %argp.cur.aligned, align 16 |
| call void @llvm.va_end.p0(ptr %vlist) |
| %2 = extractelement <4 x i32> %1, i64 0 |
| %3 = extractelement <4 x i32> %1, i64 1 |
| %add = add nsw i32 %2, %3 |
| %4 = extractelement <4 x i32> %1, i64 2 |
| %add1 = add nsw i32 %add, %4 |
| %5 = extractelement <4 x i32> %1, i64 3 |
| %add2 = add nsw i32 %add1, %5 |
| ret i32 %add2 |
| } |
| |
| define dso_local i32 @baz() { |
| ; CHECK-PTX-LABEL: baz( |
| ; CHECK-PTX: { |
| ; CHECK-PTX-NEXT: .local .align 16 .b8 __local_depot5[16]; |
| ; CHECK-PTX-NEXT: .reg .b64 %SP; |
| ; CHECK-PTX-NEXT: .reg .b64 %SPL; |
| ; CHECK-PTX-NEXT: .reg .b32 %r<2>; |
| ; CHECK-PTX-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-PTX-EMPTY: |
| ; CHECK-PTX-NEXT: // %bb.0: // %entry |
| ; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot5; |
| ; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; |
| ; CHECK-PTX-NEXT: st.v4.b32 [%SP], {1, 1, 1, 1}; |
| ; CHECK-PTX-NEXT: { // callseq 2, 0 |
| ; CHECK-PTX-NEXT: .param .b32 param0; |
| ; CHECK-PTX-NEXT: .param .b64 param1; |
| ; CHECK-PTX-NEXT: .param .b32 retval0; |
| ; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0; |
| ; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1; |
| ; CHECK-PTX-NEXT: st.param.b32 [param0], 1; |
| ; CHECK-PTX-NEXT: call.uni (retval0), variadics3, (param0, param1); |
| ; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0]; |
| ; CHECK-PTX-NEXT: } // callseq 2 |
| ; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r1; |
| ; CHECK-PTX-NEXT: ret; |
| entry: |
| %call = call i32 (i32, ...) @variadics3(i32 noundef 1, <4 x i32> noundef <i32 1, i32 1, i32 1, i32 1>) |
| ret i32 %call |
| } |
| |
| define dso_local i32 @variadics4(ptr noundef byval(%struct.S2) align 8 %first, ...) { |
| ; CHECK-PTX-LABEL: variadics4( |
| ; CHECK-PTX: { |
| ; CHECK-PTX-NEXT: .reg .b64 %rd<9>; |
| ; CHECK-PTX-EMPTY: |
| ; CHECK-PTX-NEXT: // %bb.0: // %entry |
| ; CHECK-PTX-NEXT: ld.param.b64 %rd1, [variadics4_param_1]; |
| ; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7; |
| ; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8; |
| ; CHECK-PTX-NEXT: ld.b64 %rd4, [%rd3]; |
| ; CHECK-PTX-NEXT: ld.param.b64 %rd5, [variadics4_param_0]; |
| ; CHECK-PTX-NEXT: ld.param.b64 %rd6, [variadics4_param_0+8]; |
| ; CHECK-PTX-NEXT: add.s64 %rd7, %rd5, %rd6; |
| ; CHECK-PTX-NEXT: add.s64 %rd8, %rd7, %rd4; |
| ; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %rd8; |
| ; CHECK-PTX-NEXT: ret; |
| entry: |
| %vlist = alloca ptr, align 8 |
| call void @llvm.va_start.p0(ptr %vlist) |
| %argp.cur = load ptr, ptr %vlist, align 8 |
| %0 = getelementptr inbounds i8, ptr %argp.cur, i32 7 |
| %argp.cur.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %0, i64 -8) |
| %argp.next = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 8 |
| store ptr %argp.next, ptr %vlist, align 8 |
| %1 = load i64, ptr %argp.cur.aligned, align 8 |
| %x1 = getelementptr inbounds %struct.S2, ptr %first, i32 0, i32 0 |
| %2 = load i64, ptr %x1, align 8 |
| %y = getelementptr inbounds %struct.S2, ptr %first, i32 0, i32 1 |
| %3 = load i64, ptr %y, align 8 |
| %add = add nsw i64 %2, %3 |
| %add2 = add nsw i64 %add, %1 |
| %conv = trunc i64 %add2 to i32 |
| call void @llvm.va_end.p0(ptr %vlist) |
| ret i32 %conv |
| } |
| |
| define dso_local void @qux() { |
| ; CHECK-PTX-LABEL: qux( |
| ; CHECK-PTX: { |
| ; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot7[24]; |
| ; CHECK-PTX-NEXT: .reg .b64 %SP; |
| ; CHECK-PTX-NEXT: .reg .b64 %SPL; |
| ; CHECK-PTX-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-PTX-EMPTY: |
| ; CHECK-PTX-NEXT: // %bb.0: // %entry |
| ; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot7; |
| ; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; |
| ; CHECK-PTX-NEXT: add.u64 %rd1, %SPL, 0; |
| ; CHECK-PTX-NEXT: ld.global.nc.b64 %rd2, [__const_$_qux_$_s+8]; |
| ; CHECK-PTX-NEXT: st.local.b64 [%rd1+8], %rd2; |
| ; CHECK-PTX-NEXT: ld.global.nc.b64 %rd3, [__const_$_qux_$_s]; |
| ; CHECK-PTX-NEXT: st.local.b64 [%rd1], %rd3; |
| ; CHECK-PTX-NEXT: st.b64 [%SP+16], 1; |
| ; CHECK-PTX-NEXT: { // callseq 3, 0 |
| ; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16]; |
| ; CHECK-PTX-NEXT: .param .b64 param1; |
| ; CHECK-PTX-NEXT: .param .b32 retval0; |
| ; CHECK-PTX-NEXT: add.u64 %rd4, %SP, 16; |
| ; CHECK-PTX-NEXT: st.param.b64 [param1], %rd4; |
| ; CHECK-PTX-NEXT: ld.local.b64 %rd5, [%rd1+8]; |
| ; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd5; |
| ; CHECK-PTX-NEXT: ld.local.b64 %rd6, [%rd1]; |
| ; CHECK-PTX-NEXT: st.param.b64 [param0], %rd6; |
| ; CHECK-PTX-NEXT: call.uni (retval0), variadics4, (param0, param1); |
| ; CHECK-PTX-NEXT: } // callseq 3 |
| ; CHECK-PTX-NEXT: ret; |
| entry: |
| %s = alloca %struct.S2, align 8 |
| call void @llvm.memcpy.p0.p0.i64(ptr align 8 %s, ptr align 8 @__const.qux.s, i64 16, i1 false) |
| %call = call i32 (ptr, ...) @variadics4(ptr noundef byval(%struct.S2) align 8 %s, i64 noundef 1) |
| ret void |
| } |