blob: a55b847f4498846c633ea6ebc386307b385964d2 [file] [log] [blame]
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
; DPP8
; VOP1->3
v_cvt_f16_f32_e64_dpp v5, v1 div:2 dpp8:[0,2,1,3,4,5,6,7]
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x18,0x01,0x50,0xc6,0xfa]
v_ffbh_i32_e64_dpp v5, v1 dpp8:[0,0,2,3,4,4,6,7]
// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x80,0x46,0xfa]
v_ctz_i32_b32_e64_dpp v199, v1 dpp8:[0,0,2,3,4,4,6,7] fi:1
// GFX11: encoding: [0xc7,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0x01,0x80,0x46,0xfa]
; VOP2->3
v_xnor_b32_e64_dpp v8, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX11: encoding: [0x08,0x00,0x1e,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
v_add_nc_u32_e64_dpp v60, v61, v62 dpp8:[7,6,5,3,4,2,1,0] fi:1
// GFX11: encoding: [0x3c,0x00,0x25,0xd5,0xea,0x7c,0x02,0x00,0x3d,0x77,0x47,0x05]
v_mul_f32_e64_dpp v0, v1, v2 dpp8:[0,1,2,3,1,5,6,7]
// GFX11: encoding: [0x00,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x88,0x96,0xfa]
v_cndmask_b32_e64_dpp v202, v1, v2, s[4:5] dpp8:[7,1,7,2,7,3,7,4]
// W64: encoding: [0xca,0x00,0x01,0xd5,0xe9,0x04,0x12,0x00,0x01,0xcf,0xf5,0x9d]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
; VOP2B -> VOP3B
v_add_co_ci_u32_e64_dpp v0, s2, v1, v2, s1 clamp dpp8:[7,6,5,3,4,2,1,0] fi:1
// W32: encoding: [0x00,0x82,0x20,0xd5,0xea,0x04,0x06,0x00,0x01,0x77,0x47,0x05]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_add_co_ci_u32_e64_dpp v0, s[2:3], v1, v2, s[2:3] clamp dpp8:[7,6,5,3,4,2,1,0] fi:1
// W64: encoding: [0x00,0x82,0x20,0xd5,0xea,0x04,0x0a,0x00,0x01,0x77,0x47,0x05]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_sub_co_ci_u32_e64_dpp v0, vcc, v1, v2, vcc clamp dpp8:[7,6,5,3,4,2,1,0]
// W64: encoding: [0x00,0xea,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x47,0x05]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_sub_co_ci_u32_e64_dpp v0, vcc, v1, v2, s[2:3] dpp8:[7,6,5,3,4,2,1,0] fi:1
// W64: encoding: [0x00,0x6a,0x21,0xd5,0xea,0x04,0x0a,0x00,0x01,0x77,0x47,0x05]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
; VOP3
v_bfe_u32_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7]
// GFX11: encoding: [0x00,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa]
v_maxmin_f32_e64_dpp v0, v1, v2, v3 clamp dpp8:[0,1,2,3,4,5,6,7]
// GFX11: encoding: [0x00,0x80,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa]
v_maxmin_f32_e64_dpp v0, v1, v2, v3 div:2 dpp8:[0,1,2,3,4,5,6,7] fi:1
// GFX11: encoding: [0x00,0x00,0x5e,0xd6,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa]
v_minmax_f32_e64_dpp v0, -v1, -v2, -v3 dpp8:[0,1,2,3,4,5,6,7]
// GFX11: encoding: [0x00,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0xe4,0x01,0x88,0xc6,0xfa]
v_minmax_f32_e64_dpp v0, abs(v1), v2, v3 dpp8:[0,1,2,3,4,5,6,7]
// GFX11: encoding: [0x00,0x01,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa]
v_fma_f32_e64_dpp v80, v81, v82, v81 dpp8:[0,1,6,3,4,5,6,7]
// GFX11: encoding: [0x50,0x00,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa]
v_fma_f32_e64_dpp v80, v81, abs(v82), v81 dpp8:[0,1,6,3,4,5,6,7]
// GFX11: encoding: [0x50,0x02,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa]
v_max3_f32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
v_add3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05]
v_add3_u32_e64_dpp v5, v1, v2, s4 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x12,0x00,0x01,0x77,0x39,0x05]
;VOP3B
v_subrev_co_u32_e64_dpp v5, null, v1, v2 dpp8:[2,2,2,2,4,4,4,4]
// GFX11: encoding: [0x05,0x7c,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x92,0x44,0x92]
v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[2,2,2,2,4,4,4,4]
// W32: encoding: [0x05,0x6a,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x92,0x44,0x92]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
; VOP3P
v_fma_mix_f32 v0, v1, v2, v3 dpp8:[2,2,2,2,4,4,4,4]
// GFX11: encoding: [0x00,0x00,0x20,0xcc,0xe9,0x04,0x0e,0x04,0x01,0x92,0x44,0x92]
v_fma_mix_f32 v0, v1, v2, v3 clamp dpp8:[2,2,2,2,4,4,4,4] fi:1
// GFX11: encoding: [0x00,0x80,0x20,0xcc,0xea,0x04,0x0e,0x04,0x01,0x92,0x44,0x92]
v_fma_mixlo_f16 v0, abs(v1), -v2, abs(v3) dpp8:[2,2,2,2,4,4,4,4]
// GFX11: encoding: [0x00,0x05,0x21,0xcc,0xe9,0x04,0x0e,0x44,0x01,0x92,0x44,0x92]
; For test purpose only. OP_SEL has to be set to all 0 and OP_SEL_HI has to be set to all 1
v_fma_mixlo_f16 v0, abs(v1), -v2, abs(v3) op_sel:[1,0,0] op_sel_hi:[1,0,0] dpp8:[2,2,2,2,4,4,4,4]
// GFX11: encoding: [0x00,0x0d,0x21,0xcc,0xe9,0x04,0x0e,0x4c,0x01,0x92,0x44,0x92]
v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 neg_lo:[0,1,1] neg_hi:[1,0,1] dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0x00,0x05,0x13,0xcc,0xe9,0x04,0x0e,0xc4,0x01,0x77,0x39,0x05]
; DPP
; VOP1->3
v_floor_f32_e64_dpp v5, v1 clamp row_shl:0x7 row_mask:0x0 bank_mask:0x0 fi:1
// GFX11: encoding: [0x05,0x80,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x07,0x05,0x00]
v_fract_f32_e64_dpp v5, v1 mul:2 quad_perm:[1,3,1,0] row_mask:0x7
// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x1d,0x00,0x7f]
v_sat_pk_u8_i16_e64_dpp v0, v2 row_mirror bank_mask:0x2 fi:1
// GFX11: encoding: [0x00,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x02,0x40,0x05,0xf2]
v_cvt_i32_f32_e64_dpp v5, v1 clamp row_shl:0x7 row_mask:0x0 bank_mask:0x0
// GFX11: encoding: [0x05,0x80,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x07,0x01,0x00]
; VOP2->3
v_mul_f16_e64_dpp v0, v2, v4 row_share:0xa bound_ctrl:0
// GFX11: encoding: [0x00,0x00,0x35,0xd5,0xfa,0x08,0x02,0x00,0x02,0x5a,0x09,0xff]
v_xnor_b32_e64_dpp v8, v5, v2 quad_perm:[1,0,2,3] row_mask:0x1 bank_mask:0x0
// GFX11: encoding: [0x08,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x05,0xe1,0x00,0x10]
v_mbcnt_lo_u32_b32_e64_dpp v5, v126, v2 row_half_mirror bound_ctrl:0
// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x7e,0x41,0x09,0xff]
v_mul_i32_i24_e64_dpp v208, v101, v4 clamp row_shr:0xe row_mask:0x3 bank_mask:0xa bound_ctrl:0
// GFX11: encoding: [0xd0,0x80,0x09,0xd5,0xfa,0x08,0x02,0x00,0x65,0x1e,0x09,0x3a]
v_cndmask_b16_e64_dpp v0, v1, v2, null quad_perm:[2,3,0,0]
// GFX11: encoding: [0x00,0x00,0x5d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x0e,0x00,0xff]
v_cndmask_b32_e64_dpp v202, v1, v2, s1 quad_perm:[2,3,0,0] fi:1
// W32: encoding: [0xca,0x00,0x01,0xd5,0xfa,0x04,0x06,0x00,0x01,0x0e,0x04,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
; VOP2
v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00]
v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x04,0x00]
v_dot2c_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05]
v_dot2c_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX11: encoding: [0xea,0x04,0x0a,0x04,0x01,0x77,0x39,0x05]
v_dot2acc_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00]
v_dot2acc_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x04,0x00]
v_dot2acc_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05]
v_dot2acc_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX11: encoding: [0xea,0x04,0x0a,0x04,0x01,0x77,0x39,0x05]
; VOP2B -> VOP3B
v_subrev_co_ci_u32_e64_dpp v0, vcc_lo, v1, v2, vcc_lo quad_perm:[2,2,3,1]
// W32: encoding: [0x00,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x7a,0x00,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_add_co_ci_u32_e64_dpp v0, s5, v1, v2, vcc_hi quad_perm:[0,2,3,1] row_mask:0x0
// W32: encoding: [0x00,0x05,0x20,0xd5,0xfa,0x04,0xae,0x01,0x01,0x78,0x00,0x0f]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_add_co_ci_u32_e64_dpp v0, vcc, v1, v2, s[0:1] quad_perm:[0,2,3,1] row_mask:0x0
// W64: encoding: [0x00,0x6a,0x20,0xd5,0xfa,0x04,0x02,0x00,0x01,0x78,0x00,0x0f]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
; VOP3
v_bfe_u32_e64_dpp v0, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0
// GFX11: encoding: [0x00,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x00]
v_fma_f32_e64_dpp v93, abs(v94), v95, v94 quad_perm:[3,2,1,0] bank_mask:0xe
// GFX11: encoding: [0x5d,0x01,0x13,0xd6,0xfa,0xbe,0x7a,0x05,0x5e,0x1b,0x00,0xfe]
v_sub_nc_i32_e64_dpp v93, v94, v95 row_ror:7 bank_mask:0x1 bound_ctrl:0
// GFX11: encoding: [0x5d,0x00,0x25,0xd7,0xfa,0xbe,0x02,0x00,0x5e,0x27,0x09,0xf1]
v_lshl_or_b32_e64_dpp v255, v5, v0, vcc_hi row_xmask:0x6 row_mask:0x0 fi:1
// GFX11: encoding: [0xff,0x00,0x56,0xd6,0xfa,0x00,0xae,0x01,0x05,0x66,0x05,0x0f]
v_cubesc_f32_e64_dpp v5, v1, v2, 1 row_shr:4 row_mask:0xf bank_mask:0xf
// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x06,0x02,0x01,0x14,0x01,0xff]
v_cubesc_f32_e64_dpp v5, v1, v2, s2 row_shr:4 row_mask:0xf bank_mask:0xf
// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0a,0x00,0x01,0x14,0x01,0xff]
; VOP3B
v_add_co_u32_e64_dpp v5, s4, v1, v2 clamp quad_perm:[2,2,3,1] bound_ctrl:0 fi:1
// W32: encoding: [0x05,0x84,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x7a,0x0c,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_add_co_u32_e64_dpp v243, vcc, v243, v2 clamp quad_perm:[1,2,3,1] bank_mask: 0x5 fi:1
// W64: encoding: [0xf3,0xea,0x00,0xd7,0xfa,0x04,0x02,0x00,0xf3,0x79,0x04,0xf5]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
; VOP3P
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,0] neg_hi:[0,0,0] quad_perm:[2,2,3,1] bound_ctrl:0 fi:1
// GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; encoding: [0x00,0x00,0x13,0xcc,0xfa,0x04,0x0e,0x04,0x01,0x7a,0x0c,0xff]
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] quad_perm:[3,2,1,0] bank_mask:0xe
// GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x00,0x05,0x13,0xcc,0xfa,0x04,0x0e,0x64,0x01,0x1b,0x00,0xfe]
v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,0] row_ror:7 bank_mask:0x1 bound_ctrl:0
// GFX11: v_fma_mix_f32_e64_dpp v0, v1, v2, v3 row_ror:7 row_mask:0xf bank_mask:0x1 bound_ctrl:1 ; encoding: [0x00,0x00,0x20,0xcc,0xfa,0x04,0x0e,0x04,0x01,0x27,0x09,0xf1]
v_fma_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,1,1] clamp quad_perm:[0,2,3,1] row_mask:0x0
// GFX11: v_fma_mixhi_f16_e64_dpp v0, v1, v2, v3 op_sel_hi:[1,1,1] clamp quad_perm:[0,2,3,1] row_mask:0x0 bank_mask:0xf ; encoding: [0x00,0xc0,0x22,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x78,0x00,0x0f]