| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %} |
| |
| ; Verify that we correctly emit code for i8 ldg/ldu. We do not expose 8-bit |
| ; registers in the backend, so these loads need special handling. |
| |
| target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" |
| target triple = "nvptx64-unknown-unknown" |
| |
| define ptx_kernel void @ex_zext(ptr noalias readonly %data, ptr %res) { |
| ; CHECK-LABEL: ex_zext( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<2>; |
| ; CHECK-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b64 %rd1, [ex_zext_param_0]; |
| ; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1; |
| ; CHECK-NEXT: ld.param.b64 %rd3, [ex_zext_param_1]; |
| ; CHECK-NEXT: cvta.to.global.u64 %rd4, %rd3; |
| ; CHECK-NEXT: ld.global.nc.b8 %r1, [%rd2]; |
| ; CHECK-NEXT: st.global.b32 [%rd4], %r1; |
| ; CHECK-NEXT: ret; |
| entry: |
| %val = load i8, ptr %data |
| %valext = zext i8 %val to i32 |
| store i32 %valext, ptr %res |
| ret void |
| } |
| |
| define ptx_kernel void @ex_sext(ptr noalias readonly %data, ptr %res) { |
| ; CHECK-LABEL: ex_sext( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<2>; |
| ; CHECK-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b64 %rd1, [ex_sext_param_0]; |
| ; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1; |
| ; CHECK-NEXT: ld.param.b64 %rd3, [ex_sext_param_1]; |
| ; CHECK-NEXT: cvta.to.global.u64 %rd4, %rd3; |
| ; CHECK-NEXT: ld.global.nc.s8 %r1, [%rd2]; |
| ; CHECK-NEXT: st.global.b32 [%rd4], %r1; |
| ; CHECK-NEXT: ret; |
| entry: |
| %val = load i8, ptr %data |
| %valext = sext i8 %val to i32 |
| store i32 %valext, ptr %res |
| ret void |
| } |
| |
| define ptx_kernel void @ex_zext_v2(ptr noalias readonly %data, ptr %res) { |
| ; CHECK-LABEL: ex_zext_v2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b64 %rd1, [ex_zext_v2_param_0]; |
| ; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1; |
| ; CHECK-NEXT: ld.param.b64 %rd3, [ex_zext_v2_param_1]; |
| ; CHECK-NEXT: cvta.to.global.u64 %rd4, %rd3; |
| ; CHECK-NEXT: ld.global.nc.v2.b8 {%rs1, %rs2}, [%rd2]; |
| ; CHECK-NEXT: cvt.u32.u16 %r1, %rs2; |
| ; CHECK-NEXT: cvt.u32.u16 %r2, %rs1; |
| ; CHECK-NEXT: st.global.v2.b32 [%rd4], {%r2, %r1}; |
| ; CHECK-NEXT: ret; |
| entry: |
| %val = load <2 x i8>, ptr %data |
| %valext = zext <2 x i8> %val to <2 x i32> |
| store <2 x i32> %valext, ptr %res |
| ret void |
| } |
| |
| define ptx_kernel void @ex_sext_v2(ptr noalias readonly %data, ptr %res) { |
| ; CHECK-LABEL: ex_sext_v2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b64 %rd1, [ex_sext_v2_param_0]; |
| ; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1; |
| ; CHECK-NEXT: ld.param.b64 %rd3, [ex_sext_v2_param_1]; |
| ; CHECK-NEXT: cvta.to.global.u64 %rd4, %rd3; |
| ; CHECK-NEXT: ld.global.nc.v2.b8 {%rs1, %rs2}, [%rd2]; |
| ; CHECK-NEXT: cvt.u32.u16 %r1, %rs2; |
| ; CHECK-NEXT: cvt.s32.s8 %r2, %r1; |
| ; CHECK-NEXT: cvt.u32.u16 %r3, %rs1; |
| ; CHECK-NEXT: cvt.s32.s8 %r4, %r3; |
| ; CHECK-NEXT: st.global.v2.b32 [%rd4], {%r4, %r2}; |
| ; CHECK-NEXT: ret; |
| entry: |
| %val = load <2 x i8>, ptr %data |
| %valext = sext <2 x i8> %val to <2 x i32> |
| store <2 x i32> %valext, ptr %res |
| ret void |
| } |
| |