| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s |
| |
| define float @v_bfi_single_nesting_level(float %x, float %y, float %z) { |
| ; GCN-LABEL: v_bfi_single_nesting_level: |
| ; GCN: ; %bb.0: ; %.entry |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_mul_f32_e32 v2, 0x447fc000, v2 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 |
| ; GCN-NEXT: v_mul_f32_e32 v0, 0x447fc000, v0 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 |
| ; GCN-NEXT: v_and_b32_e32 v1, 0xffc00, v1 |
| ; GCN-NEXT: v_and_b32_e32 v2, 0xc00003ff, v2 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 20, v0 |
| ; GCN-NEXT: v_or_b32_e32 v1, v1, v2 |
| ; GCN-NEXT: v_and_b32_e32 v0, 0x3ff00000, v0 |
| ; GCN-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| .entry: |
| %mul.base = fmul reassoc nnan nsz arcp contract afn float %z, 1.023000e+03 |
| %mul.base.i32 = fptoui float %mul.base to i32 |
| %y.i32 = fptoui float %y to i32 |
| %shl.inner.insert = shl i32 %y.i32, 10 |
| %bfi1.and = and i32 %shl.inner.insert, 1047552 |
| %bfi1.andnot = and i32 %mul.base.i32, -1073740801 |
| %bfi1.or = or i32 %bfi1.and, %bfi1.andnot |
| %mul.outer.insert = fmul reassoc nnan nsz arcp contract afn float %x, 1.023000e+03 |
| %mul.outer.insert.i32 = fptoui float %mul.outer.insert to i32 |
| %shl.outer.insert = shl i32 %mul.outer.insert.i32, 20 |
| %and.outer = and i32 %shl.outer.insert, 1072693248 |
| %or.outer = or i32 %bfi1.or, %and.outer |
| %result = bitcast i32 %or.outer to float |
| ret float %result |
| } |
| |
| define float @v_bfi_single_nesting_level_swapped_operands(float %x, float %y, float %z) { |
| ; GCN-LABEL: v_bfi_single_nesting_level_swapped_operands: |
| ; GCN: ; %bb.0: ; %.entry |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_mul_f32_e32 v2, 0x447fc000, v2 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 |
| ; GCN-NEXT: v_mul_f32_e32 v0, 0x447fc000, v0 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 |
| ; GCN-NEXT: v_and_b32_e32 v1, 0xffc00, v1 |
| ; GCN-NEXT: v_and_b32_e32 v2, 0xc00003ff, v2 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 20, v0 |
| ; GCN-NEXT: v_or_b32_e32 v1, v1, v2 |
| ; GCN-NEXT: v_and_b32_e32 v0, 0x3ff00000, v0 |
| ; GCN-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| .entry: |
| %mul.base = fmul reassoc nnan nsz arcp contract afn float %z, 1.023000e+03 |
| %mul.base.i32 = fptoui float %mul.base to i32 |
| %y.i32 = fptoui float %y to i32 |
| %shl.inner.insert = shl i32 %y.i32, 10 |
| %bfi1.and = and i32 1047552, %shl.inner.insert |
| %bfi1.andnot = and i32 -1073740801, %mul.base.i32 |
| %bfi1.or = or i32 %bfi1.and, %bfi1.andnot |
| %mul.outer.insert = fmul reassoc nnan nsz arcp contract afn float %x, 1.023000e+03 |
| %mul.outer.insert.i32 = fptoui float %mul.outer.insert to i32 |
| %shl.outer.insert = shl i32 %mul.outer.insert.i32, 20 |
| %and.outer = and i32 %shl.outer.insert, 1072693248 |
| %or.outer = or i32 %and.outer, %bfi1.or |
| %result = bitcast i32 %or.outer to float |
| ret float %result |
| } |
| |
| define float @v_bfi_single_nesting_level_unbalanced_subtree(float %x, float %y, float %z) { |
| ; GCN-LABEL: v_bfi_single_nesting_level_unbalanced_subtree: |
| ; GCN: ; %bb.0: ; %.entry |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_mul_f32_e32 v2, 0x447fc000, v2 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 |
| ; GCN-NEXT: v_mul_f32_e32 v0, 0x447fc000, v0 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 |
| ; GCN-NEXT: v_and_b32_e32 v1, 0xffc00, v1 |
| ; GCN-NEXT: v_and_b32_e32 v3, 0x3e0, v2 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 20, v0 |
| ; GCN-NEXT: v_or_b32_e32 v1, v1, v3 |
| ; GCN-NEXT: v_and_b32_e32 v0, 0x3ff00000, v0 |
| ; GCN-NEXT: v_and_b32_e32 v2, 0xc000001f, v2 |
| ; GCN-NEXT: v_or_b32_e32 v1, v2, v1 |
| ; GCN-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| .entry: |
| %mul.base = fmul reassoc nnan nsz arcp contract afn float %z, 1.023000e+03 |
| %mul.base.i32 = fptoui float %mul.base to i32 |
| %y.i32 = fptoui float %y to i32 |
| %shl.inner.2.insert = shl i32 %y.i32, 10 |
| %bfi.inner.2.and.1 = and i32 %shl.inner.2.insert, 1047552 |
| %bfi.inner.2.and.2 = and i32 %mul.base.i32, 992 |
| %bfi.inner.2 = or i32 %bfi.inner.2.and.1, %bfi.inner.2.and.2 |
| %mul.inner.1.insert = fmul reassoc nnan nsz arcp contract afn float %x, 1.023000e+03 |
| %mul.inner.1.insert.1.i32 = fptoui float %mul.inner.1.insert to i32 |
| %shl.inner.1.insert.1 = shl i32 %mul.inner.1.insert.1.i32, 20 |
| %bfi.inner.1.and.1 = and i32 %shl.inner.1.insert.1, 1072693248 |
| %bfi.inner.1.and.2 = and i32 %mul.base.i32, -1073741793 |
| %bfi.inner.1 = or i32 %bfi.inner.1.and.2, %bfi.inner.2 |
| %bfi.outer = or i32 %bfi.inner.1.and.1, %bfi.inner.1 |
| %result = bitcast i32 %bfi.outer to float |
| ret float %result |
| } |
| |
| define float @v_bfi_single_nesting_level_inner_use(float %x, float %y, float %z) { |
| ; GCN-LABEL: v_bfi_single_nesting_level_inner_use: |
| ; GCN: ; %bb.0: ; %.entry |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_mul_f32_e32 v0, 0x447fc000, v2 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GCN-NEXT: v_and_b32_e32 v1, 0xffc00, v1 |
| ; GCN-NEXT: v_and_b32_e32 v0, 0x400003ff, v0 |
| ; GCN-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| .entry: |
| %mul.base = fmul reassoc nnan nsz arcp contract afn float %z, 1.023000e+03 |
| %mul.base.i32 = fptoui float %mul.base to i32 |
| %y.i32 = fptoui float %y to i32 |
| %shl.inner.insert = shl i32 %y.i32, 10 |
| %bfi1.and = and i32 %shl.inner.insert, 1047552 |
| %bfi1.andnot = and i32 %mul.base.i32, -1073740801 |
| %bfi1.or = or i32 %bfi1.and, %bfi1.andnot |
| %mul.outer.insert = fmul reassoc nnan nsz arcp contract afn float %x, 1.023000e+03 |
| %mul.outer.insert.i32 = fptoui float %mul.outer.insert to i32 |
| %shl.outer.insert = shl i32 %mul.outer.insert.i32, 20 |
| %and.outer = and i32 %shl.outer.insert, 1072693248 |
| %or.outer = or i32 %bfi1.or, %and.outer |
| %bfi1.or.seconduse = mul i32 %bfi1.or, 2 |
| %result = bitcast i32 %bfi1.or.seconduse to float |
| ret float %result |
| } |
| |
| define float @v_bfi_no_nesting(float %x, float %y, float %z) { |
| ; GCN-LABEL: v_bfi_no_nesting: |
| ; GCN: ; %bb.0: ; %.entry |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_mul_f32_e32 v2, 0x447fc000, v2 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 |
| ; GCN-NEXT: v_mul_f32_e32 v0, 0x447fc000, v0 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 |
| ; GCN-NEXT: v_and_b32_e32 v1, 0xffc00, v1 |
| ; GCN-NEXT: v_and_b32_e32 v2, 0xc0000400, v2 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 20, v0 |
| ; GCN-NEXT: v_or_b32_e32 v1, v1, v2 |
| ; GCN-NEXT: v_and_b32_e32 v0, 0x3ff00000, v0 |
| ; GCN-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| .entry: |
| %mul.base = fmul reassoc nnan nsz arcp contract afn float %z, 1.023000e+03 |
| %mul.base.i32 = fptoui float %mul.base to i32 |
| %y.i32 = fptoui float %y to i32 |
| %shl.inner.insert = shl i32 %y.i32, 10 |
| %inner.and = and i32 %shl.inner.insert, 1047552 |
| %inner.and2 = and i32 %mul.base.i32, -1073740800 |
| %inner.or = or i32 %inner.and, %inner.and2 |
| %mul.outer.insert = fmul reassoc nnan nsz arcp contract afn float %x, 1.023000e+03 |
| %mul.outer.insert.i32 = fptoui float %mul.outer.insert to i32 |
| %shl.outer.insert = shl i32 %mul.outer.insert.i32, 20 |
| %and.outer = and i32 %shl.outer.insert, 1072693248 |
| %or.outer = or i32 %inner.or, %and.outer |
| %result = bitcast i32 %or.outer to float |
| ret float %result |
| } |
| |
| define float @v_bfi_two_levels(float %x, float %y, float %z) { |
| ; GCN-LABEL: v_bfi_two_levels: |
| ; GCN: ; %bb.0: ; %.entry |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 |
| ; GCN-NEXT: v_mul_f32_e32 v0, 0x447fc000, v0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v3, 5, v1 |
| ; GCN-NEXT: v_and_b32_e32 v2, 0xc000001f, v2 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 |
| ; GCN-NEXT: v_and_b32_e32 v3, 0x3e0, v3 |
| ; GCN-NEXT: v_and_b32_e32 v1, 0xffc00, v1 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 20, v0 |
| ; GCN-NEXT: v_or_b32_e32 v2, v3, v2 |
| ; GCN-NEXT: v_or_b32_e32 v1, v2, v1 |
| ; GCN-NEXT: v_and_b32_e32 v0, 0x3ff00000, v0 |
| ; GCN-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| .entry: |
| %y.i32 = fptoui float %y to i32 |
| %shl.insert.inner = shl i32 %y.i32, 5 |
| %and.insert.inner = and i32 %shl.insert.inner, 992 |
| %z.i32 = fptoui float %z to i32 |
| %base.inner = and i32 %z.i32, -1073741793 |
| %or.inner = or i32 %and.insert.inner , %base.inner |
| %shl.insert.mid = shl i32 %y.i32, 10 |
| %and.insert.mid = and i32 %shl.insert.mid, 1047552 |
| %or.mid = or i32 %or.inner, %and.insert.mid |
| %fmul.insert.outer = fmul reassoc nnan nsz arcp contract afn float %x, 1.023000e+03 |
| %cast.insert.outer = fptoui float %fmul.insert.outer to i32 |
| %shl.insert.outer = shl i32 %cast.insert.outer, 20 |
| %and.insert.outer = and i32 %shl.insert.outer, 1072693248 |
| %or.outer = or i32 %or.mid, %and.insert.outer |
| %result = bitcast i32 %or.outer to float |
| ret float %result |
| } |
| |
| define float @v_bfi_two_levels_inner_or_multiple_uses(float %x, float %y, float %z) { |
| ; GCN-LABEL: v_bfi_two_levels_inner_or_multiple_uses: |
| ; GCN: ; %bb.0: ; %.entry |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 |
| ; GCN-NEXT: v_mul_f32_e32 v0, 0x447fc000, v0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v3, 5, v1 |
| ; GCN-NEXT: v_and_b32_e32 v2, 0xc000001f, v2 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 |
| ; GCN-NEXT: v_and_b32_e32 v3, 0x3e0, v3 |
| ; GCN-NEXT: v_and_b32_e32 v1, 0xffc00, v1 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 20, v0 |
| ; GCN-NEXT: v_or_b32_e32 v2, v3, v2 |
| ; GCN-NEXT: v_or_b32_e32 v1, v2, v1 |
| ; GCN-NEXT: v_and_b32_e32 v0, 0x3ff00000, v0 |
| ; GCN-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GCN-NEXT: v_mul_f32_e32 v0, v0, v2 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| .entry: |
| %y.i32 = fptoui float %y to i32 |
| %shl.insert.inner = shl i32 %y.i32, 5 |
| %and.insert.inner = and i32 %shl.insert.inner, 992 |
| %z.i32 = fptoui float %z to i32 |
| %base.inner = and i32 %z.i32, -1073741793 |
| %or.inner = or i32 %and.insert.inner , %base.inner |
| %shl.insert.mid = shl i32 %y.i32, 10 |
| %and.insert.mid = and i32 %shl.insert.mid, 1047552 |
| %or.mid = or i32 %or.inner, %and.insert.mid |
| %fmul.insert.outer = fmul reassoc nnan nsz arcp contract afn float %x, 1.023000e+03 |
| %cast.insert.outer = fptoui float %fmul.insert.outer to i32 |
| %shl.insert.outer = shl i32 %cast.insert.outer, 20 |
| %and.insert.outer = and i32 %shl.insert.outer, 1072693248 |
| %or.outer = or i32 %or.mid, %and.insert.outer |
| %result = bitcast i32 %or.outer to float |
| %or.inner.float = bitcast i32 %or.inner to float |
| %result2 = fmul float %result, %or.inner.float |
| ret float %result2 |
| } |
| |
| define float @v_bfi_single_constant_as_partition(float %x, float %y, float %z) { |
| ; GCN-LABEL: v_bfi_single_constant_as_partition: |
| ; GCN: ; %bb.0: ; %.entry |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_mul_f32_e32 v2, 0x447fc000, v2 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 |
| ; GCN-NEXT: v_mul_f32_e32 v0, 0x447fc000, v0 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 |
| ; GCN-NEXT: v_or_b32_e32 v1, v1, v2 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 20, v0 |
| ; GCN-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| .entry: |
| %mul.base = fmul reassoc nnan nsz arcp contract afn float %z, 1.023000e+03 |
| %mul.base.i32 = fptoui float %mul.base to i32 |
| %y.i32 = fptoui float %y to i32 |
| %shl.inner.insert = shl i32 %y.i32, 10 |
| %bfi1.or = or i32 %shl.inner.insert, %mul.base.i32 |
| %mul.outer.insert = fmul reassoc nnan nsz arcp contract afn float %x, 1.023000e+03 |
| %mul.outer.insert.i32 = fptoui float %mul.outer.insert to i32 |
| %shl.outer.insert = shl i32 %mul.outer.insert.i32, 20 |
| %and.outer = and i32 %shl.outer.insert, -1 |
| %or.outer = or i32 %bfi1.or, %and.outer |
| %result = bitcast i32 %or.outer to float |
| ret float %result |
| } |
| |
| define amdgpu_kernel void @v_bfi_dont_applied_for_scalar_ops(ptr addrspace(1) %out, i16 %a, i32 %b) { |
| ; GCN-LABEL: v_bfi_dont_applied_for_scalar_ops: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_and_b32 s3, s3, 0xffff0000 |
| ; GCN-NEXT: s_and_b32 s2, s2, 0xffff |
| ; GCN-NEXT: s_or_b32 s2, s2, s3 |
| ; GCN-NEXT: s_mov_b32 s6, -1 |
| ; GCN-NEXT: s_mov_b32 s4, s0 |
| ; GCN-NEXT: s_mov_b32 s5, s1 |
| ; GCN-NEXT: v_mov_b32_e32 v0, s2 |
| ; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; GCN-NEXT: s_endpgm |
| %shift = lshr i32 %b, 16 |
| %tr = trunc i32 %shift to i16 |
| %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 |
| %vec = insertelement <2 x i16> %tmp, i16 %tr, i32 1 |
| %val = bitcast <2 x i16> %vec to i32 |
| store i32 %val, ptr addrspace(1) %out, align 4 |
| ret void |
| } |