blob: 9455a285924f23e60541eb3cb0312417d20cf1f3 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+experimental-p,+m,+zbb \
; RUN: -verify-machineinstrs < %s | \
; RUN: FileCheck --check-prefixes=CHECK,RV32 %s
; RUN: llc -mtriple=riscv64 -mattr=+experimental-p,+m,+zbb \
; RUN: -verify-machineinstrs < %s | \
; RUN: FileCheck --check-prefixes=CHECK,RV64 %s
define <4 x i8> @test_cc_v4i8(<4 x i8> %a, <4 x i8> %b) {
; CHECK-LABEL: test_cc_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: padd.b a0, a0, a1
; CHECK-NEXT: ret
%res = add <4 x i8> %a, %b
ret <4 x i8> %res
}
define <2 x i16> @test_cc_v2i16(<2 x i16> %a, <2 x i16> %b) {
; CHECK-LABEL: test_cc_v2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: padd.h a0, a0, a1
; CHECK-NEXT: ret
%res = add <2 x i16> %a, %b
ret <2 x i16> %res
}
define <8 x i8> @test_cc_v8i8(<8 x i8> %a, <8 x i8> %b) {
; RV32-LABEL: test_cc_v8i8:
; RV32: # %bb.0:
; RV32-NEXT: padd.b a0, a0, a2
; RV32-NEXT: padd.b a1, a1, a3
; RV32-NEXT: ret
;
; RV64-LABEL: test_cc_v8i8:
; RV64: # %bb.0:
; RV64-NEXT: padd.b a0, a0, a1
; RV64-NEXT: ret
%res = add <8 x i8> %a, %b
ret <8 x i8> %res
}
define <4 x i16> @test_cc_v4i16(<4 x i16> %a, <4 x i16> %b) {
; RV32-LABEL: test_cc_v4i16:
; RV32: # %bb.0:
; RV32-NEXT: padd.h a0, a0, a2
; RV32-NEXT: padd.h a1, a1, a3
; RV32-NEXT: ret
;
; RV64-LABEL: test_cc_v4i16:
; RV64: # %bb.0:
; RV64-NEXT: padd.h a0, a0, a1
; RV64-NEXT: ret
%res = add <4 x i16> %a, %b
ret <4 x i16> %res
}
define <2 x i32> @test_cc_v2i32(<2 x i32> %a, <2 x i32> %b) {
; RV32-LABEL: test_cc_v2i32:
; RV32: # %bb.0:
; RV32-NEXT: add a0, a0, a2
; RV32-NEXT: add a1, a1, a3
; RV32-NEXT: ret
;
; RV64-LABEL: test_cc_v2i32:
; RV64: # %bb.0:
; RV64-NEXT: padd.w a0, a0, a1
; RV64-NEXT: ret
%res = add <2 x i32> %a, %b
ret <2 x i32> %res
}
; Indirect on RV32, two registers on RV64
define <16 x i8> @test_cc_v16i8(<16 x i8> %a, <16 x i8> %b) {
; RV32-LABEL: test_cc_v16i8:
; RV32: # %bb.0:
; RV32-NEXT: lw a3, 0(a2)
; RV32-NEXT: lw a4, 4(a2)
; RV32-NEXT: lw a5, 8(a2)
; RV32-NEXT: lw a2, 12(a2)
; RV32-NEXT: lw a6, 0(a1)
; RV32-NEXT: lw a7, 4(a1)
; RV32-NEXT: lw t0, 8(a1)
; RV32-NEXT: lw a1, 12(a1)
; RV32-NEXT: padd.b a3, a6, a3
; RV32-NEXT: padd.b a4, a7, a4
; RV32-NEXT: padd.b a5, t0, a5
; RV32-NEXT: padd.b a1, a1, a2
; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: sw a4, 4(a0)
; RV32-NEXT: sw a5, 8(a0)
; RV32-NEXT: sw a1, 12(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: test_cc_v16i8:
; RV64: # %bb.0:
; RV64-NEXT: padd.b a0, a0, a2
; RV64-NEXT: padd.b a1, a1, a3
; RV64-NEXT: ret
%res = add <16 x i8> %a, %b
ret <16 x i8> %res
}
define <8 x i16> @test_cc_v8i16(<8 x i16> %a, <8 x i16> %b) {
; RV32-LABEL: test_cc_v8i16:
; RV32: # %bb.0:
; RV32-NEXT: lw a3, 0(a2)
; RV32-NEXT: lw a4, 4(a2)
; RV32-NEXT: lw a5, 8(a2)
; RV32-NEXT: lw a2, 12(a2)
; RV32-NEXT: lw a6, 0(a1)
; RV32-NEXT: lw a7, 4(a1)
; RV32-NEXT: lw t0, 8(a1)
; RV32-NEXT: lw a1, 12(a1)
; RV32-NEXT: padd.h a3, a6, a3
; RV32-NEXT: padd.h a4, a7, a4
; RV32-NEXT: padd.h a5, t0, a5
; RV32-NEXT: padd.h a1, a1, a2
; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: sw a4, 4(a0)
; RV32-NEXT: sw a5, 8(a0)
; RV32-NEXT: sw a1, 12(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: test_cc_v8i16:
; RV64: # %bb.0:
; RV64-NEXT: padd.h a0, a0, a2
; RV64-NEXT: padd.h a1, a1, a3
; RV64-NEXT: ret
%res = add <8 x i16> %a, %b
ret <8 x i16> %res
}
define <4 x i32> @test_cc_v4i32(<4 x i32> %a, <4 x i32> %b) {
; RV32-LABEL: test_cc_v4i32:
; RV32: # %bb.0:
; RV32-NEXT: lw a3, 0(a2)
; RV32-NEXT: lw a4, 4(a2)
; RV32-NEXT: lw a5, 8(a2)
; RV32-NEXT: lw a2, 12(a2)
; RV32-NEXT: lw a6, 0(a1)
; RV32-NEXT: lw a7, 4(a1)
; RV32-NEXT: lw t0, 8(a1)
; RV32-NEXT: lw a1, 12(a1)
; RV32-NEXT: add a3, a6, a3
; RV32-NEXT: add a4, a7, a4
; RV32-NEXT: add a5, t0, a5
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: sw a4, 4(a0)
; RV32-NEXT: sw a5, 8(a0)
; RV32-NEXT: sw a1, 12(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: test_cc_v4i32:
; RV64: # %bb.0:
; RV64-NEXT: padd.w a0, a0, a2
; RV64-NEXT: padd.w a1, a1, a3
; RV64-NEXT: ret
%res = add <4 x i32> %a, %b
ret <4 x i32> %res
}
; Function call tests
declare <4 x i8> @external_v4i8(<4 x i8>, <4 x i8>)
define <4 x i8> @test_call_v4i8(<4 x i8> %a, <4 x i8> %b) {
; RV32-LABEL: test_call_v4i8:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: mv a2, a0
; RV32-NEXT: mv a0, a1
; RV32-NEXT: mv a1, a2
; RV32-NEXT: call external_v4i8
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: .cfi_restore ra
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_call_v4i8:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: mv a2, a0
; RV64-NEXT: mv a0, a1
; RV64-NEXT: mv a1, a2
; RV64-NEXT: call external_v4i8
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: .cfi_restore ra
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: .cfi_def_cfa_offset 0
; RV64-NEXT: ret
%res = call <4 x i8> @external_v4i8(<4 x i8> %b, <4 x i8> %a)
ret <4 x i8> %res
}
; Test calling a function with v8i8 arguments (split on RV32)
declare <8 x i8> @external_v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_call_v8i8(<8 x i8> %a, <8 x i8> %b) {
; RV32-LABEL: test_call_v8i8:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: mv a4, a1
; RV32-NEXT: mv a5, a0
; RV32-NEXT: padd.dw a0, a2, zero
; RV32-NEXT: mv a2, a5
; RV32-NEXT: mv a3, a4
; RV32-NEXT: call external_v8i8
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: .cfi_restore ra
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_call_v8i8:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: mv a2, a0
; RV64-NEXT: mv a0, a1
; RV64-NEXT: mv a1, a2
; RV64-NEXT: call external_v8i8
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: .cfi_restore ra
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: .cfi_def_cfa_offset 0
; RV64-NEXT: ret
%res = call <8 x i8> @external_v8i8(<8 x i8> %b, <8 x i8> %a)
ret <8 x i8> %res
}
; Test calling a function with v16i8 arguments (passed by reference on RV32)
declare <16 x i8> @external_v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_call_v16i8(<16 x i8> %a, <16 x i8> %b) {
; RV32-LABEL: test_call_v16i8:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -64
; RV32-NEXT: .cfi_def_cfa_offset 64
; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: .cfi_offset s0, -8
; RV32-NEXT: lw a3, 0(a2)
; RV32-NEXT: lw a4, 4(a2)
; RV32-NEXT: lw a5, 8(a2)
; RV32-NEXT: lw a6, 12(a2)
; RV32-NEXT: lw a2, 0(a1)
; RV32-NEXT: lw a7, 4(a1)
; RV32-NEXT: lw t0, 8(a1)
; RV32-NEXT: lw a1, 12(a1)
; RV32-NEXT: mv s0, a0
; RV32-NEXT: sw a2, 0(sp)
; RV32-NEXT: sw a7, 4(sp)
; RV32-NEXT: sw t0, 8(sp)
; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 32
; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: mv a2, sp
; RV32-NEXT: sw a3, 16(sp)
; RV32-NEXT: sw a4, 20(sp)
; RV32-NEXT: sw a5, 24(sp)
; RV32-NEXT: sw a6, 28(sp)
; RV32-NEXT: call external_v16i8
; RV32-NEXT: lw a0, 32(sp)
; RV32-NEXT: lw a1, 36(sp)
; RV32-NEXT: lw a2, 40(sp)
; RV32-NEXT: lw a3, 44(sp)
; RV32-NEXT: sw a0, 0(s0)
; RV32-NEXT: sw a1, 4(s0)
; RV32-NEXT: sw a2, 8(s0)
; RV32-NEXT: sw a3, 12(s0)
; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
; RV32-NEXT: .cfi_restore ra
; RV32-NEXT: .cfi_restore s0
; RV32-NEXT: addi sp, sp, 64
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_call_v16i8:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: mv a4, a1
; RV64-NEXT: mv a5, a0
; RV64-NEXT: mv a0, a2
; RV64-NEXT: mv a1, a3
; RV64-NEXT: mv a2, a5
; RV64-NEXT: mv a3, a4
; RV64-NEXT: call external_v16i8
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: .cfi_restore ra
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: .cfi_def_cfa_offset 0
; RV64-NEXT: ret
%res = call <16 x i8> @external_v16i8(<16 x i8> %b, <16 x i8> %a)
ret <16 x i8> %res
}
define <2 x i16> @test_exhaust(i64 %dummy, i64 %dummy2, i64 %dummy3, i64 %dummy4, <2 x i16> %b) {
; RV32-LABEL: test_exhaust:
; RV32: # %bb.0:
; RV32-NEXT: lw a0, 0(sp)
; RV32-NEXT: padd.h a0, a0, a0
; RV32-NEXT: ret
;
; RV64-LABEL: test_exhaust:
; RV64: # %bb.0:
; RV64-NEXT: padd.h a0, a4, a4
; RV64-NEXT: ret
%res = add <2 x i16> %b, %b
ret <2 x i16> %res
}
define <4 x i16> @test_exhaust_2xlen_rv32(i64 %dummy, i64 %dummy2, i64 %dummy3, i32 %dummy4, <4 x i16> %b) {
; RV32-LABEL: test_exhaust_2xlen_rv32:
; RV32: # %bb.0:
; RV32-NEXT: lw a1, 0(sp)
; RV32-NEXT: padd.h a0, a7, a7
; RV32-NEXT: padd.h a1, a1, a1
; RV32-NEXT: ret
;
; RV64-LABEL: test_exhaust_2xlen_rv32:
; RV64: # %bb.0:
; RV64-NEXT: padd.h a0, a4, a4
; RV64-NEXT: ret
%res = add <4 x i16> %b, %b
ret <4 x i16> %res
}
define <4 x i16> @test_exhaust_2xlen_rv32_2(i64 %dummy, i64 %dummy2, i64 %dummy3, i64 %dummy4, <4 x i16> %b) {
; RV32-LABEL: test_exhaust_2xlen_rv32_2:
; RV32: # %bb.0:
; RV32-NEXT: lw a0, 0(sp)
; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: padd.h a0, a0, a0
; RV32-NEXT: padd.h a1, a1, a1
; RV32-NEXT: ret
;
; RV64-LABEL: test_exhaust_2xlen_rv32_2:
; RV64: # %bb.0:
; RV64-NEXT: padd.h a0, a4, a4
; RV64-NEXT: ret
%res = add <4 x i16> %b, %b
ret <4 x i16> %res
}