| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s | FileCheck %s |
| |
| ; This test has multiple opportunities for SimplifyDemandedBits after type |
| ; legalization. There are 2 opportunities on the chain feeding the LHS of the |
| ; shl. And one opportunity on the shift amount. We previously weren't managing |
| ; the DAGCombiner worklist correctly and failed to get the RHS. |
| define i32 @foo(i32 %x, i32 %y, i32 %z) { |
| ; CHECK-LABEL: foo: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: mul a0, a0, a0 |
| ; CHECK-NEXT: addi a0, a0, 1 |
| ; CHECK-NEXT: mul a0, a0, a0 |
| ; CHECK-NEXT: add a0, a0, a2 |
| ; CHECK-NEXT: addi a0, a0, 1 |
| ; CHECK-NEXT: sllw a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %b = mul i32 %x, %x |
| %c = add i32 %b, 1 |
| %d = mul i32 %c, %c |
| %e = add i32 %d, %z |
| %f = add i32 %e, 1 |
| %g = shl i32 %f, %y |
| ret i32 %g |
| } |
| |
| ; The sign bit of an nsw self multiply is 0. Make sure we can use this to |
| ; convert the AND constant to -8. |
| define i64 @mul_self_nsw_sign(i64 %x) { |
| ; CHECK-LABEL: mul_self_nsw_sign: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: mul a0, a0, a0 |
| ; CHECK-NEXT: andi a0, a0, -8 |
| ; CHECK-NEXT: ret |
| %a = mul nsw i64 %x, %x |
| %b = and i64 %a, 9223372036854775800 |
| ret i64 %b |
| } |
| |
| ; Make sure we sign extend the constant after type legalization to allow the |
| ; use of ori. |
| define void @ori(ptr nocapture noundef %0) { |
| ; CHECK-LABEL: ori: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a0) |
| ; CHECK-NEXT: ori a1, a1, -2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %2 = load i32, ptr %0, align 4 |
| %3 = or i32 %2, -2 |
| store i32 %3, ptr %0, align 4 |
| ret void |
| } |
| |
| ; Make sure we sign extend the constant after type legalization to allow the |
| ; use of xori. |
| define void @xori(ptr nocapture noundef %0) { |
| ; CHECK-LABEL: xori: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a0) |
| ; CHECK-NEXT: xori a1, a1, -5 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %2 = load i32, ptr %0, align 4 |
| %3 = xor i32 %2, -5 |
| store i32 %3, ptr %0, align 4 |
| ret void |
| } |
| |
| ; Make sure we sign extend the constant after type legalization to allow the |
| ; shorter constant materialization. |
| define void @or_signbit(ptr nocapture noundef %0) { |
| ; CHECK-LABEL: or_signbit: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a0) |
| ; CHECK-NEXT: lui a2, 524288 |
| ; CHECK-NEXT: or a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %2 = load i32, ptr %0, align 4 |
| %3 = or i32 %2, -2147483648 |
| store i32 %3, ptr %0, align 4 |
| ret void |
| } |
| |
| ; Make sure we sign extend the constant after type legalization to allow the |
| ; shorter constant materialization. |
| define void @xor_signbit(ptr nocapture noundef %0) { |
| ; CHECK-LABEL: xor_signbit: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a0) |
| ; CHECK-NEXT: lui a2, 524288 |
| ; CHECK-NEXT: xor a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %2 = load i32, ptr %0, align 4 |
| %3 = xor i32 %2, -2147483648 |
| store i32 %3, ptr %0, align 4 |
| ret void |
| } |
| |
| ; Type legalization inserts a sext_inreg after the sub. This causes the |
| ; constant for the AND to be turned into 0xfffffff8. Then SimplifyDemandedBits |
| ; removes the sext_inreg from the path to the store. This prevents |
| ; TargetShrinkDemandedConstant from being able to restore the lost upper bits |
| ; from the and mask to allow andi. ISel is able to recover the lost sext_inreg |
| ; using hasAllWUsers. We also use hasAllWUsers to recover the ANDI. |
| define signext i32 @andi_sub_cse(i32 signext %0, i32 signext %1, ptr %2) { |
| ; CHECK-LABEL: andi_sub_cse: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: andi a0, a0, -8 |
| ; CHECK-NEXT: subw a0, a0, a1 |
| ; CHECK-NEXT: sw a0, 0(a2) |
| ; CHECK-NEXT: ret |
| %4 = and i32 %0, -8 |
| %5 = sub i32 %4, %1 |
| store i32 %5, ptr %2, align 4 |
| ret i32 %5 |
| } |
| |
| define signext i32 @addi_sub_cse(i32 signext %0, i32 signext %1, ptr %2) { |
| ; CHECK-LABEL: addi_sub_cse: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: subw a0, a0, a1 |
| ; CHECK-NEXT: addiw a0, a0, -8 |
| ; CHECK-NEXT: sw a0, 0(a2) |
| ; CHECK-NEXT: ret |
| %4 = add i32 %0, -8 |
| %5 = sub i32 %4, %1 |
| store i32 %5, ptr %2, align 4 |
| ret i32 %5 |
| } |
| |
| define signext i32 @xori_sub_cse(i32 signext %0, i32 signext %1, ptr %2) { |
| ; CHECK-LABEL: xori_sub_cse: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: xori a0, a0, -8 |
| ; CHECK-NEXT: subw a0, a0, a1 |
| ; CHECK-NEXT: sw a0, 0(a2) |
| ; CHECK-NEXT: ret |
| %4 = xor i32 %0, -8 |
| %5 = sub i32 %4, %1 |
| store i32 %5, ptr %2, align 4 |
| ret i32 %5 |
| } |
| |
| define signext i32 @ori_sub_cse(i32 signext %0, i32 signext %1, ptr %2) { |
| ; CHECK-LABEL: ori_sub_cse: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ori a0, a0, -8 |
| ; CHECK-NEXT: subw a0, a0, a1 |
| ; CHECK-NEXT: sw a0, 0(a2) |
| ; CHECK-NEXT: ret |
| %4 = or i32 %0, -8 |
| %5 = sub i32 %4, %1 |
| store i32 %5, ptr %2, align 4 |
| ret i32 %5 |
| } |
| |
| ; SimplifyDemandedBits breaks the ANDI by turning -8 into 0xfffffff8. This |
| ; gets CSEd with the AND needed for type legalizing the lshr. This increases |
| ; the use count of the AND with 0xfffffff8 making TargetShrinkDemandedConstant |
| ; unable to restore it to 0xffffffff for the lshr and -8 for the AND to use |
| ; ANDI. |
| ; Instead we rely on ISel to form srliw even though the AND has multiple uses |
| ; and the mask has missing 1s where bits will be shifted out. This reduces the |
| ; use count of the AND and we can use hasAllWUsers to form ANDI. |
| define signext i32 @andi_srliw(i32 signext %0, ptr %1, i32 signext %2) { |
| ; CHECK-LABEL: andi_srliw: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: andi a3, a0, -8 |
| ; CHECK-NEXT: srliw a4, a0, 3 |
| ; CHECK-NEXT: addw a0, a3, a2 |
| ; CHECK-NEXT: sw a4, 0(a1) |
| ; CHECK-NEXT: ret |
| %4 = and i32 %0, -8 |
| %5 = lshr i32 %0, 3 |
| store i32 %5, ptr %1, align 4 |
| %6 = add i32 %4, %2 |
| ret i32 %6 |
| } |
| |
| define i32 @and_or(i32 signext %x) { |
| ; CHECK-LABEL: and_or: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: ori a0, a0, 255 |
| ; CHECK-NEXT: slli a0, a0, 48 |
| ; CHECK-NEXT: srli a0, a0, 48 |
| ; CHECK-NEXT: ret |
| entry: |
| %and = and i32 %x, 65280 |
| %or = or i32 %and, 255 |
| ret i32 %or |
| } |
| |
| define i64 @and_allones(i32 signext %x) { |
| ; CHECK-LABEL: and_allones: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: addi a0, a0, -1 |
| ; CHECK-NEXT: li a1, 1 |
| ; CHECK-NEXT: sll a0, a1, a0 |
| ; CHECK-NEXT: ret |
| entry: |
| %y = zext i32 %x to i64 |
| %shamt = add nsw i64 %y, -1 |
| %ret = shl i64 1, %shamt |
| ret i64 %ret |
| } |