[RISCV] Add ISel patterns for Xqciac QC.MULIADD instruction (#147661)

Add basic isel patterns for the multiple accumulate QC.MULIADD
instruction.

While most case work with just the TD file pattern, there are few cases
which need to be handled in ISelLowering depending on the immediate we
are multiplying with:

- imm + 1 , imm - 1, 1 - imm, -1 - imm are a power of 2 --> these become
slli and add/sub
- immediate is 2^n - 2 ^m --> this becomes (add/sub (shl X, C1), (shl X,
C2))
- imm - 2, imm - 4, imm - 6 is a power of 2 --> these use shxadd when
zba is enabled

The patch does not decompose mul if Xqciac is present, for the above
conditions. There could be cases where this may not beneficial which I
plan to address in follow up patches.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7dbc04c..456f3ae 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15993,6 +15993,10 @@
     return SDValue();
   uint64_t MulAmt = CNode->getZExtValue();
 
+  // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.
+  if (Subtarget.hasVendorXqciac() && isInt<12>(MulAmt))
+    return SDValue();
+
   const bool HasShlAdd = Subtarget.hasStdExtZba() ||
                          Subtarget.hasVendorXTHeadBa() ||
                          Subtarget.hasVendorXAndesPerf();
@@ -23752,6 +23756,10 @@
   auto *ConstNode = cast<ConstantSDNode>(C);
   const APInt &Imm = ConstNode->getAPIntValue();
 
+  // Don't do this if the Xqciac extension is enabled and the Imm in simm12.
+  if (Subtarget.hasVendorXqciac() && Imm.isSignedIntN(12))
+    return false;
+
   // Break the MUL to a SLLI and an ADD/SUB.
   if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
       (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 6a1b896..9e3eb1c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -1356,6 +1356,11 @@
           (OpNode GPRNoX0:$lhs, InTyImm:$Constant,
            (IntCCtoRISCVCC $cc), GPRNoX0:$truev, GPRNoX0:$falsev)>;
 
+let Predicates = [HasVendorXqciac, IsRV32] in {
+def : Pat<(XLenVT (add GPRNoX0:$rd, (mul GPRNoX0:$rs1, simm12:$imm12))),
+          (QC_MULIADD GPRNoX0:$rd, GPRNoX0:$rs1, simm12:$imm12)>;
+} // Predicates = [HasVendorXqciac, IsRV32]
+
 /// Simple arithmetic operations
 
 let Predicates = [HasVendorXqcilia, IsRV32] in {
diff --git a/llvm/test/CodeGen/RISCV/xqciac.ll b/llvm/test/CodeGen/RISCV/xqciac.ll
new file mode 100644
index 0000000..4cee091
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/xqciac.ll
@@ -0,0 +1,271 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32IM
+; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-xqciac -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32IMXQCIAC
+; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-xqciac,+zba -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32IZBAMXQCIAC
+
+define dso_local i32 @mul(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: mul:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    slli a0, a1, 5
+; RV32IM-NEXT:    add a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: mul:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    li a0, 33
+; RV32IMXQCIAC-NEXT:    mul a0, a1, a0
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: mul:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    li a0, 33
+; RV32IZBAMXQCIAC-NEXT:    mul a0, a1, a0
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, 33
+  ret i32 %mul
+}
+
+define dso_local i32 @muliadd(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: muliadd:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    li a2, 165
+; RV32IM-NEXT:    mul a1, a1, a2
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: muliadd:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    qc.muliadd a0, a1, 165
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: muliadd:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    qc.muliadd a0, a1, 165
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, 165
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+define dso_local i32 @muliadd2(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: muliadd2:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    li a2, 1111
+; RV32IM-NEXT:    mul a1, a1, a2
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: muliadd2:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    qc.muliadd a0, a1, 1111
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: muliadd2:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    qc.muliadd a0, a1, 1111
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, 1111
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+define dso_local i32 @muliadd_neg(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: muliadd_neg:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    li a2, -165
+; RV32IM-NEXT:    mul a1, a1, a2
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: muliadd_neg:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    qc.muliadd a0, a1, -165
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: muliadd_neg:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    qc.muliadd a0, a1, -165
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, -165
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+define dso_local i32 @muliadd_neg2(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: muliadd_neg2:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    li a2, -2045
+; RV32IM-NEXT:    mul a1, a1, a2
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: muliadd_neg2:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    qc.muliadd a0, a1, -2045
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: muliadd_neg2:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    qc.muliadd a0, a1, -2045
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, -2045
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+define dso_local i32 @pow2immplus1(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: pow2immplus1:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    slli a2, a1, 5
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    add a0, a2, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: pow2immplus1:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    qc.muliadd a0, a1, 33
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: pow2immplus1:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    qc.muliadd a0, a1, 33
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, 33
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+define dso_local i32 @pow2immminus2(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: pow2immminus2:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    slli a2, a1, 1
+; RV32IM-NEXT:    slli a1, a1, 7
+; RV32IM-NEXT:    sub a1, a1, a2
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: pow2immminus2:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    qc.muliadd a0, a1, 126
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: pow2immminus2:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    qc.muliadd a0, a1, 126
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, 126
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+define dso_local i32 @pow2minuspow2(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: pow2minuspow2:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    slli a2, a1, 7
+; RV32IM-NEXT:    slli a1, a1, 9
+; RV32IM-NEXT:    sub a1, a1, a2
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: pow2minuspow2:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    qc.muliadd a0, a1, 384
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: pow2minuspow2:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    qc.muliadd a0, a1, 384
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, 384
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+define dso_local i32 @gtsimm12(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: gtsimm12:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    lui a2, 1
+; RV32IM-NEXT:    addi a2, a2, 477
+; RV32IM-NEXT:    mul a1, a1, a2
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: gtsimm12:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    lui a2, 1
+; RV32IMXQCIAC-NEXT:    addi a2, a2, 477
+; RV32IMXQCIAC-NEXT:    mul a1, a1, a2
+; RV32IMXQCIAC-NEXT:    add a0, a0, a1
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: gtsimm12:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    lui a2, 1
+; RV32IZBAMXQCIAC-NEXT:    addi a2, a2, 477
+; RV32IZBAMXQCIAC-NEXT:    mul a1, a1, a2
+; RV32IZBAMXQCIAC-NEXT:    add a0, a0, a1
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, 4573
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+; NOTE: This will become qc.shladd once support is added
+define dso_local i32 @pow2(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: pow2:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    slli a1, a1, 5
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: pow2:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    slli a1, a1, 5
+; RV32IMXQCIAC-NEXT:    add a0, a0, a1
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: pow2:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    slli a1, a1, 5
+; RV32IZBAMXQCIAC-NEXT:    add a0, a0, a1
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, 32
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+define dso_local i32 @shxadd(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: shxadd:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    slli a1, a1, 1
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: shxadd:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    slli a1, a1, 1
+; RV32IMXQCIAC-NEXT:    add a0, a0, a1
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: shxadd:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    sh1add a0, a1, a0
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, 2
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}