| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s |
| |
| target triple = "nvptx64-unknown-unknown" |
| |
| define void @kernel_func(ptr %in.vec, ptr %out.vec0) nounwind { |
| ; CHECK-LABEL: kernel_func( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<10>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.u32 %r1, [kernel_func_param_0]; |
| ; CHECK-NEXT: ld.u32 %r2, [%r1+8]; |
| ; CHECK-NEXT: ld.u32 %r3, [%r1]; |
| ; CHECK-NEXT: ld.u32 %r4, [%r1+24]; |
| ; CHECK-NEXT: ld.u32 %r5, [%r1+16]; |
| ; CHECK-NEXT: ld.param.u32 %r6, [kernel_func_param_1]; |
| ; CHECK-NEXT: prmt.b32 %r7, %r5, %r4, 0x4000U; |
| ; CHECK-NEXT: prmt.b32 %r8, %r3, %r2, 0x40U; |
| ; CHECK-NEXT: prmt.b32 %r9, %r8, %r7, 0x7610U; |
| ; CHECK-NEXT: st.u32 [%r6], %r9; |
| ; CHECK-NEXT: ret; |
| %wide.vec = load <32 x i8>, ptr %in.vec, align 64 |
| %vec0 = shufflevector <32 x i8> %wide.vec, <32 x i8> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24> |
| store <4 x i8> %vec0, ptr %out.vec0, align 64 |
| ret void |
| } |