| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -mtriple=riscv32 -global-isel -mattr=+experimental-p,+zbb -verify-machineinstrs \ |
| ; RUN: < %s | FileCheck %s |
| |
| define i8 @cls_i8(i8 %x) { |
| ; CHECK-LABEL: cls_i8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: sext.b a0, a0 |
| ; CHECK-NEXT: cls a0, a0 |
| ; CHECK-NEXT: addi a0, a0, -24 |
| ; CHECK-NEXT: ret |
| %a = ashr i8 %x, 7 |
| %b = xor i8 %x, %a |
| %c = call i8 @llvm.ctlz.i8(i8 %b, i1 false) |
| %d = sub i8 %c, 1 |
| ret i8 %d |
| } |
| |
| define i8 @cls_i8_2(i8 %x) { |
| ; CHECK-LABEL: cls_i8_2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: sext.b a0, a0 |
| ; CHECK-NEXT: cls a0, a0 |
| ; CHECK-NEXT: addi a0, a0, -24 |
| ; CHECK-NEXT: ret |
| %a = ashr i8 %x, 7 |
| %b = xor i8 %x, %a |
| %c = shl i8 %b, 1 |
| %d = or i8 %c, 1 |
| %e = call i8 @llvm.ctlz.i8(i8 %d, i1 true) |
| ret i8 %e |
| } |
| |
| define i16 @cls_i16(i16 %x) { |
| ; CHECK-LABEL: cls_i16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: sext.h a0, a0 |
| ; CHECK-NEXT: cls a0, a0 |
| ; CHECK-NEXT: addi a0, a0, -16 |
| ; CHECK-NEXT: ret |
| %a = ashr i16 %x, 15 |
| %b = xor i16 %x, %a |
| %c = call i16 @llvm.ctlz.i16(i16 %b, i1 false) |
| %d = sub i16 %c, 1 |
| ret i16 %d |
| } |
| |
| define i16 @cls_i16_2(i16 %x) { |
| ; CHECK-LABEL: cls_i16_2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: sext.h a0, a0 |
| ; CHECK-NEXT: cls a0, a0 |
| ; CHECK-NEXT: addi a0, a0, -16 |
| ; CHECK-NEXT: ret |
| %a = ashr i16 %x, 15 |
| %b = xor i16 %x, %a |
| %c = shl i16 %b, 1 |
| %d = or i16 %c, 1 |
| %e = call i16 @llvm.ctlz.i16(i16 %d, i1 true) |
| ret i16 %e |
| } |
| |
| define i32 @cls_i32(i32 %x) { |
| ; CHECK-LABEL: cls_i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: cls a0, a0 |
| ; CHECK-NEXT: ret |
| %a = ashr i32 %x, 31 |
| %b = xor i32 %x, %a |
| %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) |
| %d = sub i32 %c, 1 |
| ret i32 %d |
| } |
| |
| define i32 @cls_i32_2(i32 %x) { |
| ; CHECK-LABEL: cls_i32_2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: cls a0, a0 |
| ; CHECK-NEXT: ret |
| %a = ashr i32 %x, 31 |
| %b = xor i32 %x, %a |
| %c = shl i32 %b, 1 |
| %d = or i32 %c, 1 |
| %e = call i32 @llvm.ctlz.i32(i32 %d, i1 true) |
| ret i32 %e |
| } |
| |
| define i64 @cls_i64(i64 %x) { |
| ; CHECK-LABEL: cls_i64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: srai a2, a1, 31 |
| ; CHECK-NEXT: xor a1, a1, a2 |
| ; CHECK-NEXT: beqz a1, .LBB6_2 |
| ; CHECK-NEXT: # %bb.1: |
| ; CHECK-NEXT: clz a0, a1 |
| ; CHECK-NEXT: j .LBB6_3 |
| ; CHECK-NEXT: .LBB6_2: |
| ; CHECK-NEXT: xor a0, a0, a2 |
| ; CHECK-NEXT: clz a0, a0 |
| ; CHECK-NEXT: addi a0, a0, 32 |
| ; CHECK-NEXT: .LBB6_3: |
| ; CHECK-NEXT: addi a0, a0, -1 |
| ; CHECK-NEXT: sltiu a1, a0, -1 |
| ; CHECK-NEXT: li a2, -1 |
| ; CHECK-NEXT: add a1, a2, a1 |
| ; CHECK-NEXT: ret |
| %a = ashr i64 %x, 63 |
| %b = xor i64 %x, %a |
| %c = call i64 @llvm.ctlz.i64(i64 %b, i1 false) |
| %d = sub i64 %c, 1 |
| ret i64 %d |
| } |
| |
| define i64 @cls_i64_2(i64 %x) { |
| ; CHECK-LABEL: cls_i64_2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: srai a2, a1, 31 |
| ; CHECK-NEXT: xor a0, a0, a2 |
| ; CHECK-NEXT: xor a1, a1, a2 |
| ; CHECK-NEXT: slli a1, a1, 1 |
| ; CHECK-NEXT: srli a2, a0, 31 |
| ; CHECK-NEXT: or a1, a1, a2 |
| ; CHECK-NEXT: beqz a1, .LBB7_2 |
| ; CHECK-NEXT: # %bb.1: |
| ; CHECK-NEXT: clz a0, a1 |
| ; CHECK-NEXT: li a1, 0 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB7_2: |
| ; CHECK-NEXT: slli a0, a0, 1 |
| ; CHECK-NEXT: ori a0, a0, 1 |
| ; CHECK-NEXT: clz a0, a0 |
| ; CHECK-NEXT: addi a0, a0, 32 |
| ; CHECK-NEXT: li a1, 0 |
| ; CHECK-NEXT: ret |
| %a = ashr i64 %x, 63 |
| %b = xor i64 %x, %a |
| %c = shl i64 %b, 1 |
| %d = or i64 %c, 1 |
| %e = call i64 @llvm.ctlz.i64(i64 %d, i1 true) |
| ret i64 %e |
| } |
| |
| ; The result is in the range [1-31], so we don't need an andi after the cls. |
| define i32 @cls_i32_knownbits(i32 %x) { |
| ; CHECK-LABEL: cls_i32_knownbits: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: cls a0, a0 |
| ; CHECK-NEXT: ret |
| %a = ashr i32 %x, 31 |
| %b = xor i32 %x, %a |
| %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) |
| %d = sub i32 %c, 1 |
| %e = and i32 %d, 31 |
| ret i32 %e |
| } |
| |
| ; There are at least 16 redundant sign bits so we don't need an ori after the clsw. |
| define i32 @cls_i32_knownbits_2(i16 signext %x) { |
| ; CHECK-LABEL: cls_i32_knownbits_2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: cls a0, a0 |
| ; CHECK-NEXT: ret |
| %sext = sext i16 %x to i32 |
| %a = ashr i32 %sext, 31 |
| %b = xor i32 %sext, %a |
| %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) |
| %d = sub i32 %c, 1 |
| %e = or i32 %d, 16 |
| ret i32 %e |
| } |
| |
| ; There are at least 24 redundant sign bits so we don't need an ori after the clsw. |
| define i32 @cls_i32_knownbits_3(i8 signext %x) { |
| ; CHECK-LABEL: cls_i32_knownbits_3: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: cls a0, a0 |
| ; CHECK-NEXT: ret |
| %sext = sext i8 %x to i32 |
| %a = ashr i32 %sext, 31 |
| %b = xor i32 %sext, %a |
| %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) |
| %d = sub i32 %c, 1 |
| %e = or i32 %d, 24 |
| ret i32 %e |
| } |
| |
| ; Negative test. We only know there is at least 1 redundant sign bit. We can't |
| ; remove the ori. |
| define i32 @cls_i32_knownbits_4(i32 signext %x) { |
| ; CHECK-LABEL: cls_i32_knownbits_4: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: slli a0, a0, 1 |
| ; CHECK-NEXT: srai a0, a0, 1 |
| ; CHECK-NEXT: cls a0, a0 |
| ; CHECK-NEXT: ori a0, a0, 1 |
| ; CHECK-NEXT: ret |
| %shl = shl i32 %x, 1 |
| %ashr = ashr i32 %shl, 1 |
| %a = ashr i32 %ashr, 31 |
| %b = xor i32 %ashr, %a |
| %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) |
| %d = sub i32 %c, 1 |
| %e = or i32 %d, 1 |
| ret i32 %e |
| } |
| |
| ; Negative test. Check that the number of sign bits is not |
| ; overestimated. If it is, the ori disappears. |
| define i32 @cls_i32_knownbits_no_overestimate(i32 signext %x) { |
| ; CHECK-LABEL: cls_i32_knownbits_no_overestimate: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: srai a1, a0, 15 |
| ; CHECK-NEXT: srai a0, a0, 31 |
| ; CHECK-NEXT: xor a0, a1, a0 |
| ; CHECK-NEXT: clz a0, a0 |
| ; CHECK-NEXT: addi a0, a0, -1 |
| ; CHECK-NEXT: ori a0, a0, 16 |
| ; CHECK-NEXT: ret |
| %ashr = ashr i32 %x, 15 |
| %a = ashr i32 %ashr, 31 |
| %b = xor i32 %ashr, %a |
| %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) |
| %d = sub i32 %c, 1 |
| %e = or i32 %d, 16 |
| ret i32 %e |
| } |