[LoongArch][DAG] Custom lowering for vector SETCC operations (#177904)
### Summary
This PR resolves https://github.com/llvm/llvm-project/issues/177863
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 6cb0983..bbf199f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -411,7 +411,7 @@
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
- setOperationAction(ISD::SETCC, VT, Legal);
+ setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
}
@@ -613,6 +613,8 @@
return lowerVECREDUCE(Op, DAG);
case ISD::ConstantFP:
return lowerConstantFP(Op, DAG);
+ case ISD::SETCC:
+ return lowerSETCC(Op, DAG);
}
return SDValue();
}
@@ -725,6 +727,35 @@
return SDValue();
}
+// Ensure SETCC result and operand have the same bit width; isel does not
+// support mismatched widths.
+SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT ResultVT = Op.getValueType();
+ EVT OperandVT = Op.getOperand(0).getValueType();
+
+ EVT SetCCResultVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT);
+
+ if (ResultVT == SetCCResultVT)
+ return Op;
+
+ assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
+ "SETCC operands must have the same type!");
+
+ SDValue SetCCNode =
+ DAG.getNode(ISD::SETCC, DL, SetCCResultVT, Op.getOperand(0),
+ Op.getOperand(1), Op.getOperand(2));
+
+ if (ResultVT.bitsGT(SetCCResultVT))
+ SetCCNode = DAG.getNode(ISD::SIGN_EXTEND, DL, ResultVT, SetCCNode);
+ else if (ResultVT.bitsLT(SetCCResultVT))
+ SetCCNode = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, SetCCNode);
+
+ return SetCCNode;
+}
+
// Lower vecreduce_add using vhaddw instructions.
// For Example:
// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 74d2b7d..126ea05 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -239,6 +239,7 @@
SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerRotate(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
diff --git a/llvm/test/CodeGen/LoongArch/pr177863.ll b/llvm/test/CodeGen/LoongArch/pr177863.ll
new file mode 100644
index 0000000..8edbd33
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/pr177863.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+
+; RUN: llc --mtriple=loongarch32 -mattr=+lasx --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 -mattr=+lasx --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA64
+
+define <4 x i1> @test(<4 x i64> %shuffle2, <4 x i64> %shuffle4) {
+; LA32-LABEL: test:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvseq.d $xr0, $xr1, $xr0
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 2
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 4
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 2
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 6
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3
+; LA32-NEXT: vrepli.b $vr0, -1
+; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvseq.d $xr0, $xr1, $xr0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 0
+; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 1
+; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 1
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 2
+; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 2
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 3
+; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 3
+; LA64-NEXT: vrepli.b $vr0, -1
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: ret
+entry:
+ %conv5 = trunc nuw <4 x i64> %shuffle4 to <4 x i32>
+ %conv3 = trunc nuw <4 x i64> %shuffle2 to <4 x i32>
+ %cmp = icmp ne <4 x i32> %conv5, %conv3
+ ret <4 x i1> %cmp
+}