| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; Verifies correctness of load/store of parameters and return values. |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %} |
| |
| %s_i8i16p = type { <{ i16, i8, i16 }>, i64 } |
| %s_i8i32p = type { <{ i32, i8, i32 }>, i64 } |
| %s_i8i64p = type { <{ i64, i8, i64 }>, i64 } |
| %s_i8f16p = type { <{ half, i8, half }>, i64 } |
| %s_i8f16x2p = type { <{ <2 x half>, i8, <2 x half> }>, i64 } |
| %s_i8f32p = type { <{ float, i8, float }>, i64 } |
| %s_i8f64p = type { <{ double, i8, double }>, i64 } |
| |
| ; -- All loads/stores from parameters aligned by one must be done one |
| ; byte at a time. |
| ; -- Notes: |
| ; -- There are two fields of interest in the packed part of the struct, one |
| ; with a proper offset and one without. The former should be loaded or |
| ; stored as a whole, and the latter by bytes. |
| ; -- Only loading and storing the said fields are checked in the following |
| ; series of tests so that they are more concise. |
| |
| |
| define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) { |
| ; CHECK-LABEL: test_s_i8i16p( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<15>; |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8i16p_param_0+4]; |
| ; CHECK-NEXT: shl.b16 %rs5, %rs4, 8; |
| ; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8i16p_param_0+3]; |
| ; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i16p_param_0+8]; |
| ; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8i16p_param_0+2]; |
| ; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8i16p_param_0]; |
| ; CHECK-NEXT: { // callseq 0, 0 |
| ; CHECK-NEXT: .param .align 8 .b8 param0[16]; |
| ; CHECK-NEXT: st.param.b16 [param0], %rs1; |
| ; CHECK-NEXT: st.param.b8 [param0+2], %rs2; |
| ; CHECK-NEXT: st.param.b8 [param0+3], %rs3; |
| ; CHECK-NEXT: st.param.b8 [param0+4], %rs4; |
| ; CHECK-NEXT: st.param.b64 [param0+8], %rd1; |
| ; CHECK-NEXT: .param .align 8 .b8 retval0[16]; |
| ; CHECK-NEXT: call.uni (retval0), test_s_i8i16p, (param0); |
| ; CHECK-NEXT: ld.param.b16 %rs7, [retval0]; |
| ; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2]; |
| ; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3]; |
| ; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8]; |
| ; CHECK-NEXT: } // callseq 0 |
| ; CHECK-NEXT: st.param.b16 [func_retval0], %rs7; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9; |
| ; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2; |
| ; CHECK-NEXT: ret; |
| %r = tail call %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) |
| ret %s_i8i16p %r |
| } |
| |
| |
| define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) { |
| ; CHECK-LABEL: test_s_i8i32p( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<12>; |
| ; CHECK-NEXT: .reg .b32 %r<20>; |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8i32p_param_0+6]; |
| ; CHECK-NEXT: shl.b32 %r4, %r3, 8; |
| ; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8i32p_param_0+5]; |
| ; CHECK-NEXT: or.b32 %r6, %r4, %r5; |
| ; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8i32p_param_0+7]; |
| ; CHECK-NEXT: shl.b32 %r8, %r7, 16; |
| ; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8i32p_param_0+8]; |
| ; CHECK-NEXT: shl.b32 %r10, %r9, 24; |
| ; CHECK-NEXT: or.b32 %r11, %r10, %r8; |
| ; CHECK-NEXT: or.b32 %r2, %r11, %r6; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i32p_param_0+16]; |
| ; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i32p_param_0+4]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8i32p_param_0]; |
| ; CHECK-NEXT: shr.u32 %r12, %r2, 8; |
| ; CHECK-NEXT: shr.u32 %r13, %r11, 16; |
| ; CHECK-NEXT: { // callseq 1, 0 |
| ; CHECK-NEXT: .param .align 8 .b8 param0[24]; |
| ; CHECK-NEXT: st.param.b32 [param0], %r1; |
| ; CHECK-NEXT: st.param.b8 [param0+4], %rs1; |
| ; CHECK-NEXT: st.param.b8 [param0+5], %r2; |
| ; CHECK-NEXT: st.param.b8 [param0+6], %r12; |
| ; CHECK-NEXT: st.param.b8 [param0+7], %r13; |
| ; CHECK-NEXT: st.param.b8 [param0+8], %r9; |
| ; CHECK-NEXT: st.param.b64 [param0+16], %rd1; |
| ; CHECK-NEXT: .param .align 8 .b8 retval0[24]; |
| ; CHECK-NEXT: call.uni (retval0), test_s_i8i32p, (param0); |
| ; CHECK-NEXT: ld.param.b32 %r14, [retval0]; |
| ; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; |
| ; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5]; |
| ; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6]; |
| ; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7]; |
| ; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16]; |
| ; CHECK-NEXT: } // callseq 1 |
| ; CHECK-NEXT: cvt.u32.u16 %r15, %rs3; |
| ; CHECK-NEXT: cvt.u32.u16 %r16, %rs4; |
| ; CHECK-NEXT: cvt.u32.u16 %r17, %rs5; |
| ; CHECK-NEXT: cvt.u32.u16 %r18, %rs6; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r14; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15; |
| ; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2; |
| ; CHECK-NEXT: ret; |
| %r = tail call %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) |
| ret %s_i8i32p %r |
| } |
| |
| |
| define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) { |
| ; CHECK-LABEL: test_s_i8i64p( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<20>; |
| ; CHECK-NEXT: .reg .b64 %rd<68>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8i64p_param_0+10]; |
| ; CHECK-NEXT: shl.b64 %rd5, %rd4, 8; |
| ; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8i64p_param_0+9]; |
| ; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6; |
| ; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8i64p_param_0+11]; |
| ; CHECK-NEXT: shl.b64 %rd9, %rd8, 16; |
| ; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8i64p_param_0+12]; |
| ; CHECK-NEXT: shl.b64 %rd11, %rd10, 24; |
| ; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9; |
| ; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7; |
| ; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8i64p_param_0+14]; |
| ; CHECK-NEXT: shl.b64 %rd15, %rd14, 8; |
| ; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8i64p_param_0+13]; |
| ; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16; |
| ; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8i64p_param_0+15]; |
| ; CHECK-NEXT: shl.b64 %rd19, %rd18, 16; |
| ; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8i64p_param_0+16]; |
| ; CHECK-NEXT: shl.b64 %rd21, %rd20, 24; |
| ; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19; |
| ; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17; |
| ; CHECK-NEXT: shl.b64 %rd24, %rd23, 32; |
| ; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13; |
| ; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8i64p_param_0+24]; |
| ; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i64p_param_0+8]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i64p_param_0]; |
| ; CHECK-NEXT: shr.u64 %rd25, %rd2, 8; |
| ; CHECK-NEXT: shr.u64 %rd26, %rd2, 16; |
| ; CHECK-NEXT: shr.u64 %rd27, %rd2, 24; |
| ; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24; |
| ; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16; |
| ; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8; |
| ; CHECK-NEXT: { // callseq 2, 0 |
| ; CHECK-NEXT: .param .align 8 .b8 param0[32]; |
| ; CHECK-NEXT: st.param.b64 [param0], %rd1; |
| ; CHECK-NEXT: st.param.b8 [param0+8], %rs1; |
| ; CHECK-NEXT: st.param.b8 [param0+9], %rd2; |
| ; CHECK-NEXT: st.param.b8 [param0+10], %rd25; |
| ; CHECK-NEXT: st.param.b8 [param0+11], %rd26; |
| ; CHECK-NEXT: st.param.b8 [param0+12], %rd27; |
| ; CHECK-NEXT: st.param.b8 [param0+13], %rd23; |
| ; CHECK-NEXT: st.param.b8 [param0+14], %rd28; |
| ; CHECK-NEXT: st.param.b8 [param0+15], %rd29; |
| ; CHECK-NEXT: st.param.b8 [param0+16], %rd30; |
| ; CHECK-NEXT: st.param.b64 [param0+24], %rd3; |
| ; CHECK-NEXT: .param .align 8 .b8 retval0[32]; |
| ; CHECK-NEXT: call.uni (retval0), test_s_i8i64p, (param0); |
| ; CHECK-NEXT: ld.param.b64 %rd31, [retval0]; |
| ; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8]; |
| ; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9]; |
| ; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10]; |
| ; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11]; |
| ; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12]; |
| ; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13]; |
| ; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14]; |
| ; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15]; |
| ; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16]; |
| ; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24]; |
| ; CHECK-NEXT: } // callseq 2 |
| ; CHECK-NEXT: cvt.u64.u16 %rd33, %rs3; |
| ; CHECK-NEXT: and.b64 %rd34, %rd33, 255; |
| ; CHECK-NEXT: cvt.u64.u16 %rd35, %rs4; |
| ; CHECK-NEXT: and.b64 %rd36, %rd35, 255; |
| ; CHECK-NEXT: shl.b64 %rd37, %rd36, 8; |
| ; CHECK-NEXT: or.b64 %rd38, %rd34, %rd37; |
| ; CHECK-NEXT: cvt.u64.u16 %rd39, %rs5; |
| ; CHECK-NEXT: and.b64 %rd40, %rd39, 255; |
| ; CHECK-NEXT: shl.b64 %rd41, %rd40, 16; |
| ; CHECK-NEXT: or.b64 %rd42, %rd38, %rd41; |
| ; CHECK-NEXT: cvt.u64.u16 %rd43, %rs6; |
| ; CHECK-NEXT: and.b64 %rd44, %rd43, 255; |
| ; CHECK-NEXT: shl.b64 %rd45, %rd44, 24; |
| ; CHECK-NEXT: or.b64 %rd46, %rd42, %rd45; |
| ; CHECK-NEXT: cvt.u64.u16 %rd47, %rs7; |
| ; CHECK-NEXT: and.b64 %rd48, %rd47, 255; |
| ; CHECK-NEXT: shl.b64 %rd49, %rd48, 32; |
| ; CHECK-NEXT: or.b64 %rd50, %rd46, %rd49; |
| ; CHECK-NEXT: cvt.u64.u16 %rd51, %rs8; |
| ; CHECK-NEXT: and.b64 %rd52, %rd51, 255; |
| ; CHECK-NEXT: shl.b64 %rd53, %rd52, 40; |
| ; CHECK-NEXT: or.b64 %rd54, %rd50, %rd53; |
| ; CHECK-NEXT: cvt.u64.u16 %rd55, %rs9; |
| ; CHECK-NEXT: and.b64 %rd56, %rd55, 255; |
| ; CHECK-NEXT: shl.b64 %rd57, %rd56, 48; |
| ; CHECK-NEXT: or.b64 %rd58, %rd54, %rd57; |
| ; CHECK-NEXT: cvt.u64.u16 %rd59, %rs10; |
| ; CHECK-NEXT: shl.b64 %rd60, %rd59, 56; |
| ; CHECK-NEXT: or.b64 %rd61, %rd58, %rd60; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd31; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd39; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd35; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd33; |
| ; CHECK-NEXT: shr.u64 %rd64, %rd50, 32; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64; |
| ; CHECK-NEXT: shr.u64 %rd65, %rd54, 40; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65; |
| ; CHECK-NEXT: shr.u64 %rd66, %rd58, 48; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66; |
| ; CHECK-NEXT: shr.u64 %rd67, %rd61, 56; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67; |
| ; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32; |
| ; CHECK-NEXT: ret; |
| %r = tail call %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) |
| ret %s_i8i64p %r |
| } |
| |
| |
| define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) { |
| ; CHECK-LABEL: test_s_i8f16p( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<15>; |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8f16p_param_0+4]; |
| ; CHECK-NEXT: shl.b16 %rs5, %rs4, 8; |
| ; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8f16p_param_0+3]; |
| ; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16p_param_0+8]; |
| ; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8f16p_param_0+2]; |
| ; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f16p_param_0]; |
| ; CHECK-NEXT: { // callseq 3, 0 |
| ; CHECK-NEXT: .param .align 8 .b8 param0[16]; |
| ; CHECK-NEXT: st.param.b16 [param0], %rs1; |
| ; CHECK-NEXT: st.param.b8 [param0+2], %rs2; |
| ; CHECK-NEXT: st.param.b8 [param0+3], %rs3; |
| ; CHECK-NEXT: st.param.b8 [param0+4], %rs4; |
| ; CHECK-NEXT: st.param.b64 [param0+8], %rd1; |
| ; CHECK-NEXT: .param .align 8 .b8 retval0[16]; |
| ; CHECK-NEXT: call.uni (retval0), test_s_i8f16p, (param0); |
| ; CHECK-NEXT: ld.param.b16 %rs7, [retval0]; |
| ; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2]; |
| ; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3]; |
| ; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8]; |
| ; CHECK-NEXT: } // callseq 3 |
| ; CHECK-NEXT: st.param.b16 [func_retval0], %rs7; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9; |
| ; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2; |
| ; CHECK-NEXT: ret; |
| %r = tail call %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) |
| ret %s_i8f16p %r |
| } |
| |
| |
| define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) { |
| ; CHECK-LABEL: test_s_i8f16x2p( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<12>; |
| ; CHECK-NEXT: .reg .b32 %r<20>; |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f16x2p_param_0+6]; |
| ; CHECK-NEXT: shl.b32 %r4, %r3, 8; |
| ; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f16x2p_param_0+5]; |
| ; CHECK-NEXT: or.b32 %r6, %r4, %r5; |
| ; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f16x2p_param_0+7]; |
| ; CHECK-NEXT: shl.b32 %r8, %r7, 16; |
| ; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f16x2p_param_0+8]; |
| ; CHECK-NEXT: shl.b32 %r10, %r9, 24; |
| ; CHECK-NEXT: or.b32 %r11, %r10, %r8; |
| ; CHECK-NEXT: or.b32 %r2, %r11, %r6; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16x2p_param_0+16]; |
| ; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f16x2p_param_0+4]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f16x2p_param_0]; |
| ; CHECK-NEXT: shr.u32 %r12, %r2, 8; |
| ; CHECK-NEXT: shr.u32 %r13, %r11, 16; |
| ; CHECK-NEXT: { // callseq 4, 0 |
| ; CHECK-NEXT: .param .align 8 .b8 param0[24]; |
| ; CHECK-NEXT: st.param.b32 [param0], %r1; |
| ; CHECK-NEXT: st.param.b8 [param0+4], %rs1; |
| ; CHECK-NEXT: st.param.b8 [param0+5], %r2; |
| ; CHECK-NEXT: st.param.b8 [param0+6], %r12; |
| ; CHECK-NEXT: st.param.b8 [param0+7], %r13; |
| ; CHECK-NEXT: st.param.b8 [param0+8], %r9; |
| ; CHECK-NEXT: st.param.b64 [param0+16], %rd1; |
| ; CHECK-NEXT: .param .align 8 .b8 retval0[24]; |
| ; CHECK-NEXT: call.uni (retval0), test_s_i8f16x2p, (param0); |
| ; CHECK-NEXT: ld.param.b32 %r14, [retval0]; |
| ; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; |
| ; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5]; |
| ; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6]; |
| ; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7]; |
| ; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16]; |
| ; CHECK-NEXT: } // callseq 4 |
| ; CHECK-NEXT: cvt.u32.u16 %r15, %rs3; |
| ; CHECK-NEXT: cvt.u32.u16 %r16, %rs4; |
| ; CHECK-NEXT: cvt.u32.u16 %r17, %rs5; |
| ; CHECK-NEXT: cvt.u32.u16 %r18, %rs6; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r14; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15; |
| ; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2; |
| ; CHECK-NEXT: ret; |
| %r = tail call %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) |
| ret %s_i8f16x2p %r |
| } |
| |
| |
| define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) { |
| ; CHECK-LABEL: test_s_i8f32p( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<12>; |
| ; CHECK-NEXT: .reg .b32 %r<20>; |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f32p_param_0+6]; |
| ; CHECK-NEXT: shl.b32 %r4, %r3, 8; |
| ; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f32p_param_0+5]; |
| ; CHECK-NEXT: or.b32 %r6, %r4, %r5; |
| ; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f32p_param_0+7]; |
| ; CHECK-NEXT: shl.b32 %r8, %r7, 16; |
| ; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f32p_param_0+8]; |
| ; CHECK-NEXT: shl.b32 %r10, %r9, 24; |
| ; CHECK-NEXT: or.b32 %r11, %r10, %r8; |
| ; CHECK-NEXT: or.b32 %r2, %r11, %r6; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f32p_param_0+16]; |
| ; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f32p_param_0+4]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f32p_param_0]; |
| ; CHECK-NEXT: shr.u32 %r12, %r2, 8; |
| ; CHECK-NEXT: shr.u32 %r13, %r11, 16; |
| ; CHECK-NEXT: { // callseq 5, 0 |
| ; CHECK-NEXT: .param .align 8 .b8 param0[24]; |
| ; CHECK-NEXT: st.param.b32 [param0], %r1; |
| ; CHECK-NEXT: st.param.b8 [param0+4], %rs1; |
| ; CHECK-NEXT: st.param.b8 [param0+5], %r2; |
| ; CHECK-NEXT: st.param.b8 [param0+6], %r12; |
| ; CHECK-NEXT: st.param.b8 [param0+7], %r13; |
| ; CHECK-NEXT: st.param.b8 [param0+8], %r9; |
| ; CHECK-NEXT: st.param.b64 [param0+16], %rd1; |
| ; CHECK-NEXT: .param .align 8 .b8 retval0[24]; |
| ; CHECK-NEXT: call.uni (retval0), test_s_i8f32p, (param0); |
| ; CHECK-NEXT: ld.param.b32 %r14, [retval0]; |
| ; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; |
| ; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5]; |
| ; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6]; |
| ; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7]; |
| ; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16]; |
| ; CHECK-NEXT: } // callseq 5 |
| ; CHECK-NEXT: cvt.u32.u16 %r15, %rs3; |
| ; CHECK-NEXT: cvt.u32.u16 %r16, %rs4; |
| ; CHECK-NEXT: cvt.u32.u16 %r17, %rs5; |
| ; CHECK-NEXT: cvt.u32.u16 %r18, %rs6; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r14; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15; |
| ; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2; |
| ; CHECK-NEXT: ret; |
| %r = tail call %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) |
| ret %s_i8f32p %r |
| } |
| |
| |
| define %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) { |
| ; CHECK-LABEL: test_s_i8f64p( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<20>; |
| ; CHECK-NEXT: .reg .b64 %rd<68>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8f64p_param_0+10]; |
| ; CHECK-NEXT: shl.b64 %rd5, %rd4, 8; |
| ; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8f64p_param_0+9]; |
| ; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6; |
| ; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8f64p_param_0+11]; |
| ; CHECK-NEXT: shl.b64 %rd9, %rd8, 16; |
| ; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8f64p_param_0+12]; |
| ; CHECK-NEXT: shl.b64 %rd11, %rd10, 24; |
| ; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9; |
| ; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7; |
| ; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8f64p_param_0+14]; |
| ; CHECK-NEXT: shl.b64 %rd15, %rd14, 8; |
| ; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8f64p_param_0+13]; |
| ; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16; |
| ; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8f64p_param_0+15]; |
| ; CHECK-NEXT: shl.b64 %rd19, %rd18, 16; |
| ; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8f64p_param_0+16]; |
| ; CHECK-NEXT: shl.b64 %rd21, %rd20, 24; |
| ; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19; |
| ; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17; |
| ; CHECK-NEXT: shl.b64 %rd24, %rd23, 32; |
| ; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13; |
| ; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8f64p_param_0+24]; |
| ; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f64p_param_0+8]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f64p_param_0]; |
| ; CHECK-NEXT: shr.u64 %rd25, %rd2, 8; |
| ; CHECK-NEXT: shr.u64 %rd26, %rd2, 16; |
| ; CHECK-NEXT: shr.u64 %rd27, %rd2, 24; |
| ; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24; |
| ; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16; |
| ; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8; |
| ; CHECK-NEXT: { // callseq 6, 0 |
| ; CHECK-NEXT: .param .align 8 .b8 param0[32]; |
| ; CHECK-NEXT: st.param.b64 [param0], %rd1; |
| ; CHECK-NEXT: st.param.b8 [param0+8], %rs1; |
| ; CHECK-NEXT: st.param.b8 [param0+9], %rd2; |
| ; CHECK-NEXT: st.param.b8 [param0+10], %rd25; |
| ; CHECK-NEXT: st.param.b8 [param0+11], %rd26; |
| ; CHECK-NEXT: st.param.b8 [param0+12], %rd27; |
| ; CHECK-NEXT: st.param.b8 [param0+13], %rd23; |
| ; CHECK-NEXT: st.param.b8 [param0+14], %rd28; |
| ; CHECK-NEXT: st.param.b8 [param0+15], %rd29; |
| ; CHECK-NEXT: st.param.b8 [param0+16], %rd30; |
| ; CHECK-NEXT: st.param.b64 [param0+24], %rd3; |
| ; CHECK-NEXT: .param .align 8 .b8 retval0[32]; |
| ; CHECK-NEXT: call.uni (retval0), test_s_i8f64p, (param0); |
| ; CHECK-NEXT: ld.param.b64 %rd31, [retval0]; |
| ; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8]; |
| ; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9]; |
| ; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10]; |
| ; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11]; |
| ; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12]; |
| ; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13]; |
| ; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14]; |
| ; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15]; |
| ; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16]; |
| ; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24]; |
| ; CHECK-NEXT: } // callseq 6 |
| ; CHECK-NEXT: cvt.u64.u16 %rd33, %rs3; |
| ; CHECK-NEXT: and.b64 %rd34, %rd33, 255; |
| ; CHECK-NEXT: cvt.u64.u16 %rd35, %rs4; |
| ; CHECK-NEXT: and.b64 %rd36, %rd35, 255; |
| ; CHECK-NEXT: shl.b64 %rd37, %rd36, 8; |
| ; CHECK-NEXT: or.b64 %rd38, %rd34, %rd37; |
| ; CHECK-NEXT: cvt.u64.u16 %rd39, %rs5; |
| ; CHECK-NEXT: and.b64 %rd40, %rd39, 255; |
| ; CHECK-NEXT: shl.b64 %rd41, %rd40, 16; |
| ; CHECK-NEXT: or.b64 %rd42, %rd38, %rd41; |
| ; CHECK-NEXT: cvt.u64.u16 %rd43, %rs6; |
| ; CHECK-NEXT: and.b64 %rd44, %rd43, 255; |
| ; CHECK-NEXT: shl.b64 %rd45, %rd44, 24; |
| ; CHECK-NEXT: or.b64 %rd46, %rd42, %rd45; |
| ; CHECK-NEXT: cvt.u64.u16 %rd47, %rs7; |
| ; CHECK-NEXT: and.b64 %rd48, %rd47, 255; |
| ; CHECK-NEXT: shl.b64 %rd49, %rd48, 32; |
| ; CHECK-NEXT: or.b64 %rd50, %rd46, %rd49; |
| ; CHECK-NEXT: cvt.u64.u16 %rd51, %rs8; |
| ; CHECK-NEXT: and.b64 %rd52, %rd51, 255; |
| ; CHECK-NEXT: shl.b64 %rd53, %rd52, 40; |
| ; CHECK-NEXT: or.b64 %rd54, %rd50, %rd53; |
| ; CHECK-NEXT: cvt.u64.u16 %rd55, %rs9; |
| ; CHECK-NEXT: and.b64 %rd56, %rd55, 255; |
| ; CHECK-NEXT: shl.b64 %rd57, %rd56, 48; |
| ; CHECK-NEXT: or.b64 %rd58, %rd54, %rd57; |
| ; CHECK-NEXT: cvt.u64.u16 %rd59, %rs10; |
| ; CHECK-NEXT: shl.b64 %rd60, %rd59, 56; |
| ; CHECK-NEXT: or.b64 %rd61, %rd58, %rd60; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd31; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd39; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd35; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd33; |
| ; CHECK-NEXT: shr.u64 %rd64, %rd50, 32; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64; |
| ; CHECK-NEXT: shr.u64 %rd65, %rd54, 40; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65; |
| ; CHECK-NEXT: shr.u64 %rd66, %rd58, 48; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66; |
| ; CHECK-NEXT: shr.u64 %rd67, %rd61, 56; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67; |
| ; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32; |
| ; CHECK-NEXT: ret; |
| %r = tail call %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) |
| ret %s_i8f64p %r |
| } |