blob: 94554103050f4f1fb1f3fade358c10e1ed105ef5 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=riscv32 -mattr=+m < %s | FileCheck %s -check-prefixes=RV32
; RUN: llc -mtriple=riscv64 -mattr=+m < %s | FileCheck %s -check-prefixes=RV64
;; Testcases extracted from embench-iot picojpeg - relating to PR190868
;; We should be able to delete the `slli` instructions, as they are not necessary.
define void @snippet1_i8(ptr %ret, ptr %a) nounwind {
; RV32-LABEL: snippet1_i8:
; RV32: # %bb.0:
; RV32-NEXT: lbu a2, 0(a1)
; RV32-NEXT: lbu a1, 2(a1)
; RV32-NEXT: li a3, 183
; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: srli a2, a2, 8
; RV32-NEXT: sub a1, a1, a2
; RV32-NEXT: addi a1, a1, 91
; RV32-NEXT: li a2, 255
; RV32-NEXT: bgeu a2, a1, .LBB0_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: slli a1, a1, 16
; RV32-NEXT: srli a1, a1, 31
; RV32-NEXT: addi a1, a1, -1
; RV32-NEXT: .LBB0_2:
; RV32-NEXT: sb a1, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: snippet1_i8:
; RV64: # %bb.0:
; RV64-NEXT: lbu a2, 0(a1)
; RV64-NEXT: lbu a1, 2(a1)
; RV64-NEXT: li a3, 183
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: srli a2, a2, 8
; RV64-NEXT: sub a1, a1, a2
; RV64-NEXT: addi a1, a1, 91
; RV64-NEXT: li a2, 255
; RV64-NEXT: bgeu a2, a1, .LBB0_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: slli a1, a1, 48
; RV64-NEXT: srli a1, a1, 63
; RV64-NEXT: addi a1, a1, -1
; RV64-NEXT: .LBB0_2:
; RV64-NEXT: sb a1, 0(a0)
; RV64-NEXT: ret
%1 = getelementptr i8, ptr %a, i32 2
%2 = load i16, ptr %a
%3 = load i8, ptr %1
%4 = and i16 %2, 255
%5 = mul nuw i16 %4, 183
%6 = lshr i16 %5, 8
%7 = add nsw i16 %6, -91
%8 = zext i8 %3 to i16
%9 = sub nsw i16 %8, %7
%10 = icmp ugt i16 %9, 255
%11 = trunc nuw i16 %9 to i8
%12 = icmp sgt i16 %9, -1
%13 = sext i1 %12 to i8
%14 = select i1 %10, i8 %13, i8 %11
store i8 %14, ptr %ret
ret void
}
define void @snippet2_i8(i8 zeroext %x, i8 zeroext %y, ptr %p, ptr %q) nounwind {
; RV32-LABEL: snippet2_i8:
; RV32: # %bb.0:
; RV32-NEXT: li a4, 183
; RV32-NEXT: mul a0, a0, a4
; RV32-NEXT: srli a0, a0, 8
; RV32-NEXT: addi a4, a0, -91
; RV32-NEXT: sub a0, a1, a4
; RV32-NEXT: li a1, 255
; RV32-NEXT: sh a4, 0(a2)
; RV32-NEXT: bgeu a1, a0, .LBB1_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: slli a0, a0, 16
; RV32-NEXT: srli a0, a0, 31
; RV32-NEXT: addi a0, a0, -1
; RV32-NEXT: .LBB1_2:
; RV32-NEXT: sb a0, 0(a3)
; RV32-NEXT: ret
;
; RV64-LABEL: snippet2_i8:
; RV64: # %bb.0:
; RV64-NEXT: li a4, 183
; RV64-NEXT: mul a0, a0, a4
; RV64-NEXT: srli a0, a0, 8
; RV64-NEXT: addi a4, a0, -91
; RV64-NEXT: sub a0, a1, a4
; RV64-NEXT: li a1, 255
; RV64-NEXT: sh a4, 0(a2)
; RV64-NEXT: bgeu a1, a0, .LBB1_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 63
; RV64-NEXT: addi a0, a0, -1
; RV64-NEXT: .LBB1_2:
; RV64-NEXT: sb a0, 0(a3)
; RV64-NEXT: ret
%20 = zext i8 %x to i16
%42 = mul nuw i16 %20, 183
%43 = lshr i16 %42, 8
%44 = add nsw i16 %43, -91
store i16 %44, ptr %p
%46 = zext i8 %y to i16
%47 = sub nsw i16 %46, %44
%48 = icmp ugt i16 %47, 255
%49 = trunc nuw i16 %47 to i8
%50 = icmp sgt i16 %47, -1
%51 = sext i1 %50 to i8
%52 = select i1 %48, i8 %51, i8 %49
store i8 %52, ptr %q
ret void
}