blob: 6148c0756e39378949457441b970133e89016196 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
; Verify that we correctly emit code for i8 ldg/ldu. We do not expose 8-bit
; registers in the backend, so these loads need special handling.
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-unknown-unknown"
define ptx_kernel void @ex_zext(ptr noalias readonly %data, ptr %res) {
; CHECK-LABEL: ex_zext(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: ld.param.b64 %rd1, [ex_zext_param_0];
; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1;
; CHECK-NEXT: ld.param.b64 %rd3, [ex_zext_param_1];
; CHECK-NEXT: cvta.to.global.u64 %rd4, %rd3;
; CHECK-NEXT: ld.global.nc.b8 %r1, [%rd2];
; CHECK-NEXT: st.global.b32 [%rd4], %r1;
; CHECK-NEXT: ret;
entry:
%val = load i8, ptr %data
%valext = zext i8 %val to i32
store i32 %valext, ptr %res
ret void
}
define ptx_kernel void @ex_sext(ptr noalias readonly %data, ptr %res) {
; CHECK-LABEL: ex_sext(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: ld.param.b64 %rd1, [ex_sext_param_0];
; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1;
; CHECK-NEXT: ld.param.b64 %rd3, [ex_sext_param_1];
; CHECK-NEXT: cvta.to.global.u64 %rd4, %rd3;
; CHECK-NEXT: ld.global.nc.s8 %r1, [%rd2];
; CHECK-NEXT: st.global.b32 [%rd4], %r1;
; CHECK-NEXT: ret;
entry:
%val = load i8, ptr %data
%valext = sext i8 %val to i32
store i32 %valext, ptr %res
ret void
}
define ptx_kernel void @ex_zext_v2(ptr noalias readonly %data, ptr %res) {
; CHECK-LABEL: ex_zext_v2(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: ld.param.b64 %rd1, [ex_zext_v2_param_0];
; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1;
; CHECK-NEXT: ld.param.b64 %rd3, [ex_zext_v2_param_1];
; CHECK-NEXT: cvta.to.global.u64 %rd4, %rd3;
; CHECK-NEXT: ld.global.nc.v2.b8 {%rs1, %rs2}, [%rd2];
; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
; CHECK-NEXT: st.global.v2.b32 [%rd4], {%r2, %r1};
; CHECK-NEXT: ret;
entry:
%val = load <2 x i8>, ptr %data
%valext = zext <2 x i8> %val to <2 x i32>
store <2 x i32> %valext, ptr %res
ret void
}
define ptx_kernel void @ex_sext_v2(ptr noalias readonly %data, ptr %res) {
; CHECK-LABEL: ex_sext_v2(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: ld.param.b64 %rd1, [ex_sext_v2_param_0];
; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1;
; CHECK-NEXT: ld.param.b64 %rd3, [ex_sext_v2_param_1];
; CHECK-NEXT: cvta.to.global.u64 %rd4, %rd3;
; CHECK-NEXT: ld.global.nc.v2.b8 {%rs1, %rs2}, [%rd2];
; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
; CHECK-NEXT: cvt.s32.s8 %r2, %r1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs1;
; CHECK-NEXT: cvt.s32.s8 %r4, %r3;
; CHECK-NEXT: st.global.v2.b32 [%rd4], {%r4, %r2};
; CHECK-NEXT: ret;
entry:
%val = load <2 x i8>, ptr %data
%valext = sext <2 x i8> %val to <2 x i32>
store <2 x i32> %valext, ptr %res
ret void
}