[GlobalISel][AArch64] Add lowering for G_SMULFIX (#196757)
Adding lowering for G_SMULFIX G_OP. It is needed to compile
`libc/src/stdfix/expk.cpp` with `-O3`.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 8bcae5d..9858f5e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -576,6 +576,7 @@
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI,
unsigned MaxLen = 0);
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI);
+ LLVM_ABI LegalizeResult lowerSmulfix(MachineInstr &MI);
};
} // End namespace llvm.
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 909decf..070d7ec 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4968,6 +4968,8 @@
MI.eraseFromParent();
return Legalized;
}
+ case G_SMULFIX:
+ return lowerSmulfix(MI);
}
}
@@ -10654,6 +10656,30 @@
return Legalized;
}
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerSmulfix(MachineInstr &MI) {
+ auto [Dst, LHS, RHS] = MI.getFirst3Regs();
+ LLT Ty = MRI.getType(Dst);
+ unsigned Scale = MI.getOperand(3).getImm();
+
+ if (Scale == 0) {
+ MIRBuilder.buildMul(Dst, LHS, RHS);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ LLT WideTy = Ty.changeElementSize(Ty.getScalarSizeInBits() * 2);
+ auto SExtLHS = MIRBuilder.buildSExt(WideTy, LHS);
+ auto SExtRHS = MIRBuilder.buildSExt(WideTy, RHS);
+ auto Mul = MIRBuilder.buildMul(WideTy, SExtLHS, SExtRHS);
+ auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Scale);
+ auto Shifted = MIRBuilder.buildAShr(WideTy, Mul, ShiftAmt);
+ MIRBuilder.buildTrunc(Dst, Shifted);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
// On Darwin, -Os means optimize for size without hurting performance, so
// only really optimize for size when -Oz (MinSize) is used.
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 14a3f75..4c7abbf 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -297,6 +297,8 @@
.legalFor({i64, v16i8, v8i16, v4i32})
.lower();
+ getActionDefinitionsBuilder(G_SMULFIX).lower();
+
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
.legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
.legalFor(HasCSSC, {i32, i64})
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-smulfix.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-smulfix.mir
new file mode 100644
index 0000000..2b660e5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-smulfix.mir
@@ -0,0 +1,181 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=legalizer %s -o - | FileCheck %s
+---
+name: smulfix_i32_scale_0
+body: |
+ bb.1:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: smulfix_i32_scale_0
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $w1
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(i32) = G_MUL [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $w0 = COPY [[MUL]](i32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(i32) = COPY $w0
+ %1:_(i32) = COPY $w1
+ %2:_(i32) = G_SMULFIX %0, %1, 0
+ $w0 = COPY %2(i32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: smulfix_i32
+body: |
+ bb.1:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: smulfix_i32
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i32) = COPY $w1
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(i64) = G_SEXT [[COPY]](i32)
+ ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(i64) = G_SEXT [[COPY1]](i32)
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(i64) = G_MUL [[SEXT]], [[SEXT1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 15
+ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[MUL]], [[C]](i64)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[ASHR]](i64)
+ ; CHECK-NEXT: $w0 = COPY [[TRUNC]](i32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(i32) = COPY $w0
+ %1:_(i32) = COPY $w1
+ %2:_(i32) = G_SMULFIX %0, %1, 15
+ $w0 = COPY %2(i32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: smulfix_i64
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: smulfix_i64
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 63
+ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(i64) = G_ASHR [[COPY]], [[C]](i64)
+ ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(i64) = G_ASHR [[COPY1]], [[C]](i64)
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(i64) = G_MUL [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(i64) = G_MUL [[ASHR]], [[COPY1]]
+ ; CHECK-NEXT: [[MUL2:%[0-9]+]]:_(i64) = G_MUL [[COPY]], [[ASHR1]]
+ ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(i64) = G_UMULH [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(i64) = G_ADD [[MUL1]], [[MUL2]]
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(i64) = G_ADD [[ADD]], [[UMULH]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 15
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(i64) = G_LSHR [[MUL]], [[C1]](i64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 49
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(i64) = G_SHL [[ADD1]], [[C2]](i64)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[LSHR]], [[SHL]]
+ ; CHECK-NEXT: $x0 = COPY [[OR]](i64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(i64) = COPY $x0
+ %1:_(i64) = COPY $x1
+ %2:_(i64) = G_SMULFIX %0, %1, 15
+ $x0 = COPY %2(i64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: smulfix_4xi32
+body: |
+ bb.1:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: smulfix_4xi32
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $q1
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<4 x i32>)
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x i64>) = G_SEXT [[UV]](<2 x i32>)
+ ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(<2 x i64>) = G_SEXT [[UV1]](<2 x i32>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i32>), [[UV3:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>)
+ ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(<2 x i64>) = G_SEXT [[UV2]](<2 x i32>)
+ ; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(<2 x i64>) = G_SEXT [[UV3]](<2 x i32>)
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x i64>) = G_MUL [[SEXT]], [[SEXT2]]
+ ; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(<2 x i64>) = G_MUL [[SEXT1]], [[SEXT3]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C]](i64), [[C]](i64)
+ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<2 x i64>) = G_ASHR [[MUL]], [[BUILD_VECTOR]](<2 x i64>)
+ ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x i64>) = G_ASHR [[MUL1]], [[BUILD_VECTOR]](<2 x i64>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i32>) = G_TRUNC [[ASHR]](<2 x i64>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x i32>) = G_TRUNC [[ASHR1]](<2 x i64>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[TRUNC]](<2 x i32>), [[TRUNC1]](<2 x i32>)
+ ; CHECK-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<4 x i32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<4 x i32>) = COPY $q0
+ %1:_(<4 x i32>) = COPY $q1
+ %2:_(<4 x i32>) = G_SMULFIX %0, %1, 2
+ $q0 = COPY %2(<4 x i32>)
+ RET_ReallyLR implicit $q0
+...
+---
+name: smulfix_4xi32_15
+body: |
+ bb.1:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: smulfix_4xi32_15
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $q1
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<4 x i32>)
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x i64>) = G_SEXT [[UV]](<2 x i32>)
+ ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(<2 x i64>) = G_SEXT [[UV1]](<2 x i32>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i32>), [[UV3:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>)
+ ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(<2 x i64>) = G_SEXT [[UV2]](<2 x i32>)
+ ; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(<2 x i64>) = G_SEXT [[UV3]](<2 x i32>)
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x i64>) = G_MUL [[SEXT]], [[SEXT2]]
+ ; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(<2 x i64>) = G_MUL [[SEXT1]], [[SEXT3]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 15
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C]](i64), [[C]](i64)
+ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<2 x i64>) = G_ASHR [[MUL]], [[BUILD_VECTOR]](<2 x i64>)
+ ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x i64>) = G_ASHR [[MUL1]], [[BUILD_VECTOR]](<2 x i64>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i32>) = G_TRUNC [[ASHR]](<2 x i64>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x i32>) = G_TRUNC [[ASHR1]](<2 x i64>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[TRUNC]](<2 x i32>), [[TRUNC1]](<2 x i32>)
+ ; CHECK-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<4 x i32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<4 x i32>) = COPY $q0
+ %1:_(<4 x i32>) = COPY $q1
+ %2:_(<4 x i32>) = G_SMULFIX %0, %1, 15
+ $q0 = COPY %2(<4 x i32>)
+ RET_ReallyLR implicit $q0
+...
+---
+name: smulfix_4xi32_31
+body: |
+ bb.1:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: smulfix_4xi32_31
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i32>) = COPY $q1
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x i32>), [[UV1:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY]](<4 x i32>)
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x i64>) = G_SEXT [[UV]](<2 x i32>)
+ ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(<2 x i64>) = G_SEXT [[UV1]](<2 x i32>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x i32>), [[UV3:%[0-9]+]]:_(<2 x i32>) = G_UNMERGE_VALUES [[COPY1]](<4 x i32>)
+ ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(<2 x i64>) = G_SEXT [[UV2]](<2 x i32>)
+ ; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(<2 x i64>) = G_SEXT [[UV3]](<2 x i32>)
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x i64>) = G_MUL [[SEXT]], [[SEXT2]]
+ ; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(<2 x i64>) = G_MUL [[SEXT1]], [[SEXT3]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 31
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x i64>) = G_BUILD_VECTOR [[C]](i64), [[C]](i64)
+ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<2 x i64>) = G_ASHR [[MUL]], [[BUILD_VECTOR]](<2 x i64>)
+ ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x i64>) = G_ASHR [[MUL1]], [[BUILD_VECTOR]](<2 x i64>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x i32>) = G_TRUNC [[ASHR]](<2 x i64>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x i32>) = G_TRUNC [[ASHR1]](<2 x i64>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x i32>) = G_CONCAT_VECTORS [[TRUNC]](<2 x i32>), [[TRUNC1]](<2 x i32>)
+ ; CHECK-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<4 x i32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<4 x i32>) = COPY $q0
+ %1:_(<4 x i32>) = COPY $q1
+ %2:_(<4 x i32>) = G_SMULFIX %0, %1, 31
+ $q0 = COPY %2(<4 x i32>)
+ RET_ReallyLR implicit $q0
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 2e0b781..70dbeb7 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -492,8 +492,8 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_SMULFIX (opcode {{[0-9]+}}): 1 type index, 1 imm index
-# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_UMULFIX (opcode {{[0-9]+}}): 1 type index, 1 imm index
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
diff --git a/llvm/test/CodeGen/AArch64/smul_fix.ll b/llvm/test/CodeGen/AArch64/smul_fix.ll
index dacce72..f99d20a 100644
--- a/llvm/test/CodeGen/AArch64/smul_fix.ll
+++ b/llvm/test/CodeGen/AArch64/smul_fix.ll
@@ -1,37 +1,65 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define i32 @func(i32 %x, i32 %y) nounwind {
-; CHECK-LABEL: func:
-; CHECK: // %bb.0:
-; CHECK-NEXT: smull x8, w0, w1
-; CHECK-NEXT: lsr x9, x8, #32
-; CHECK-NEXT: extr w0, w9, w8, #2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: func:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: smull x8, w0, w1
+; CHECK-SD-NEXT: lsr x9, x8, #32
+; CHECK-SD-NEXT: extr w0, w9, w8, #2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: func:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: smull x8, w0, w1
+; CHECK-GI-NEXT: asr x0, x8, #2
+; CHECK-GI-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-GI-NEXT: ret
%tmp = call i32 @llvm.smul.fix.i32(i32 %x, i32 %y, i32 2)
ret i32 %tmp
}
define i64 @func2(i64 %x, i64 %y) {
-; CHECK-LABEL: func2:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mul x8, x0, x1
-; CHECK-NEXT: smulh x9, x0, x1
-; CHECK-NEXT: extr x0, x9, x8, #2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: func2:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mul x8, x0, x1
+; CHECK-SD-NEXT: smulh x9, x0, x1
+; CHECK-SD-NEXT: extr x0, x9, x8, #2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: func2:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: asr x8, x1, #63
+; CHECK-GI-NEXT: asr x9, x0, #63
+; CHECK-GI-NEXT: umulh x10, x0, x1
+; CHECK-GI-NEXT: mul x8, x0, x8
+; CHECK-GI-NEXT: madd x8, x9, x1, x8
+; CHECK-GI-NEXT: mul x9, x0, x1
+; CHECK-GI-NEXT: add x8, x8, x10
+; CHECK-GI-NEXT: extr x0, x8, x9, #2
+; CHECK-GI-NEXT: ret
%tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 2)
ret i64 %tmp
}
define i4 @func3(i4 %x, i4 %y) nounwind {
-; CHECK-LABEL: func3:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sbfx w8, w1, #0, #4
-; CHECK-NEXT: sbfx w9, w0, #0, #4
-; CHECK-NEXT: smull x8, w9, w8
-; CHECK-NEXT: lsr x9, x8, #32
-; CHECK-NEXT: extr w0, w9, w8, #2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: func3:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sbfx w8, w1, #0, #4
+; CHECK-SD-NEXT: sbfx w9, w0, #0, #4
+; CHECK-SD-NEXT: smull x8, w9, w8
+; CHECK-SD-NEXT: lsr x9, x8, #32
+; CHECK-SD-NEXT: extr w0, w9, w8, #2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: func3:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sbfx w8, w0, #0, #4
+; CHECK-GI-NEXT: sbfx w9, w1, #0, #4
+; CHECK-GI-NEXT: mul w8, w8, w9
+; CHECK-GI-NEXT: sbfx w0, w8, #2, #6
+; CHECK-GI-NEXT: ret
%tmp = call i4 @llvm.smul.fix.i4(i4 %x, i4 %y, i32 2)
ret i4 %tmp
}
@@ -56,40 +84,69 @@
}
define i4 @func6(i4 %x, i4 %y) nounwind {
-; CHECK-LABEL: func6:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sbfx w8, w1, #0, #4
-; CHECK-NEXT: sbfx w9, w0, #0, #4
-; CHECK-NEXT: mul w0, w9, w8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: func6:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sbfx w8, w1, #0, #4
+; CHECK-SD-NEXT: sbfx w9, w0, #0, #4
+; CHECK-SD-NEXT: mul w0, w9, w8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: func6:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mul w0, w0, w1
+; CHECK-GI-NEXT: ret
%tmp = call i4 @llvm.smul.fix.i4(i4 %x, i4 %y, i32 0)
ret i4 %tmp
}
define i64 @func7(i64 %x, i64 %y) nounwind {
-; CHECK-LABEL: func7:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mul x8, x0, x1
-; CHECK-NEXT: smulh x9, x0, x1
-; CHECK-NEXT: extr x0, x9, x8, #32
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: func7:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mul x8, x0, x1
+; CHECK-SD-NEXT: smulh x9, x0, x1
+; CHECK-SD-NEXT: extr x0, x9, x8, #32
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: func7:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: asr x8, x1, #63
+; CHECK-GI-NEXT: asr x9, x0, #63
+; CHECK-GI-NEXT: umulh x10, x0, x1
+; CHECK-GI-NEXT: mul x8, x0, x8
+; CHECK-GI-NEXT: madd x8, x9, x1, x8
+; CHECK-GI-NEXT: mul x9, x0, x1
+; CHECK-GI-NEXT: add x8, x8, x10
+; CHECK-GI-NEXT: extr x0, x8, x9, #32
+; CHECK-GI-NEXT: ret
%tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 32)
ret i64 %tmp
}
define i64 @func8(i64 %x, i64 %y) nounwind {
-; CHECK-LABEL: func8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mul x8, x0, x1
-; CHECK-NEXT: smulh x9, x0, x1
-; CHECK-NEXT: extr x0, x9, x8, #63
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: func8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mul x8, x0, x1
+; CHECK-SD-NEXT: smulh x9, x0, x1
+; CHECK-SD-NEXT: extr x0, x9, x8, #63
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: func8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: asr x8, x1, #63
+; CHECK-GI-NEXT: asr x9, x0, #63
+; CHECK-GI-NEXT: umulh x10, x0, x1
+; CHECK-GI-NEXT: mul x8, x0, x8
+; CHECK-GI-NEXT: madd x8, x9, x1, x8
+; CHECK-GI-NEXT: mul x9, x0, x1
+; CHECK-GI-NEXT: add x8, x8, x10
+; CHECK-GI-NEXT: extr x0, x8, x9, #63
+; CHECK-GI-NEXT: ret
%tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 63)
ret i64 %tmp
}
-define <2 x i32> @vec(<2 x i32> %x, <2 x i32> %y) nounwind {
-; CHECK-LABEL: vec:
+define <2 x i32> @smulfix_2xi32_0(<2 x i32> %x, <2 x i32> %y) nounwind {
+; CHECK-LABEL: smulfix_2xi32_0:
; CHECK: // %bb.0:
; CHECK-NEXT: mul v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
@@ -97,8 +154,8 @@
ret <2 x i32> %tmp
}
-define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: vec2:
+define <4 x i32> @smulfix_4xi32_0(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: smulfix_4xi32_0:
; CHECK: // %bb.0:
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
@@ -106,47 +163,127 @@
ret <4 x i32> %tmp
}
-define <4 x i64> @vec3(<4 x i64> %x, <4 x i64> %y) nounwind {
-; CHECK-LABEL: vec3:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, v2.d[1]
-; CHECK-NEXT: mov x9, v0.d[1]
-; CHECK-NEXT: fmov x10, d2
-; CHECK-NEXT: fmov x11, d0
-; CHECK-NEXT: mov x14, v3.d[1]
-; CHECK-NEXT: mov x15, v1.d[1]
-; CHECK-NEXT: mul x12, x11, x10
-; CHECK-NEXT: mul x13, x9, x8
-; CHECK-NEXT: smulh x8, x9, x8
-; CHECK-NEXT: smulh x9, x11, x10
-; CHECK-NEXT: fmov x10, d3
-; CHECK-NEXT: fmov x11, d1
-; CHECK-NEXT: mul x16, x11, x10
-; CHECK-NEXT: extr x8, x8, x13, #32
-; CHECK-NEXT: smulh x10, x11, x10
-; CHECK-NEXT: extr x9, x9, x12, #32
-; CHECK-NEXT: mul x11, x15, x14
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: smulh x14, x15, x14
-; CHECK-NEXT: extr x10, x10, x16, #32
-; CHECK-NEXT: mov v0.d[1], x8
-; CHECK-NEXT: fmov d1, x10
-; CHECK-NEXT: extr x11, x14, x11, #32
-; CHECK-NEXT: mov v1.d[1], x11
-; CHECK-NEXT: ret
+define <4 x i32> @smulfix_4xi32(<4 x i32> %1, <4 x i32> %2) {
+; CHECK-SD-LABEL: smulfix_4xi32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: smull2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-NEXT: smull v3.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT: mul v1.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: uzp2 v2.4s, v3.4s, v2.4s
+; CHECK-SD-NEXT: shl v0.4s, v2.4s, #17
+; CHECK-SD-NEXT: usra v0.4s, v1.4s, #15
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: smulfix_4xi32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: smull v2.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: smull2 v1.2d, v0.4s, v1.4s
+; CHECK-GI-NEXT: shrn v0.2s, v2.2d, #15
+; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #15
+; CHECK-GI-NEXT: ret
+ %m = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> %1, <4 x i32> %2, i32 15)
+ ret <4 x i32> %m
+}
+
+define <4 x i64> @smulfix_4xi64(<4 x i64> %x, <4 x i64> %y) nounwind {
+; CHECK-SD-LABEL: smulfix_4xi64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov x8, v2.d[1]
+; CHECK-SD-NEXT: mov x9, v0.d[1]
+; CHECK-SD-NEXT: fmov x10, d2
+; CHECK-SD-NEXT: fmov x11, d0
+; CHECK-SD-NEXT: mov x14, v3.d[1]
+; CHECK-SD-NEXT: mov x15, v1.d[1]
+; CHECK-SD-NEXT: mul x12, x11, x10
+; CHECK-SD-NEXT: mul x13, x9, x8
+; CHECK-SD-NEXT: smulh x8, x9, x8
+; CHECK-SD-NEXT: smulh x9, x11, x10
+; CHECK-SD-NEXT: fmov x10, d3
+; CHECK-SD-NEXT: fmov x11, d1
+; CHECK-SD-NEXT: mul x16, x11, x10
+; CHECK-SD-NEXT: extr x8, x8, x13, #32
+; CHECK-SD-NEXT: smulh x10, x11, x10
+; CHECK-SD-NEXT: extr x9, x9, x12, #32
+; CHECK-SD-NEXT: mul x11, x15, x14
+; CHECK-SD-NEXT: fmov d0, x9
+; CHECK-SD-NEXT: smulh x14, x15, x14
+; CHECK-SD-NEXT: extr x10, x10, x16, #32
+; CHECK-SD-NEXT: mov v0.d[1], x8
+; CHECK-SD-NEXT: fmov d1, x10
+; CHECK-SD-NEXT: extr x11, x14, x11, #32
+; CHECK-SD-NEXT: mov v1.d[1], x11
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: smulfix_4xi64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov x9, d2
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: mov d2, v2.d[1]
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: asr x10, x9, #63
+; CHECK-GI-NEXT: asr x12, x8, #63
+; CHECK-GI-NEXT: mul x11, x8, x9
+; CHECK-GI-NEXT: mul x10, x8, x10
+; CHECK-GI-NEXT: umulh x8, x8, x9
+; CHECK-GI-NEXT: madd x9, x12, x9, x10
+; CHECK-GI-NEXT: fmov x12, d2
+; CHECK-GI-NEXT: fmov x10, d0
+; CHECK-GI-NEXT: mov d0, v3.d[1]
+; CHECK-GI-NEXT: asr x13, x12, #63
+; CHECK-GI-NEXT: asr x15, x10, #63
+; CHECK-GI-NEXT: mul x14, x10, x12
+; CHECK-GI-NEXT: mul x13, x10, x13
+; CHECK-GI-NEXT: fmov x0, d0
+; CHECK-GI-NEXT: add x8, x9, x8
+; CHECK-GI-NEXT: extr x8, x8, x11, #32
+; CHECK-GI-NEXT: umulh x10, x10, x12
+; CHECK-GI-NEXT: asr x1, x0, #63
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: madd x12, x15, x12, x13
+; CHECK-GI-NEXT: fmov x15, d3
+; CHECK-GI-NEXT: fmov x13, d1
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: asr x16, x15, #63
+; CHECK-GI-NEXT: asr x18, x13, #63
+; CHECK-GI-NEXT: mul x17, x13, x15
+; CHECK-GI-NEXT: mul x16, x13, x16
+; CHECK-GI-NEXT: add x9, x12, x10
+; CHECK-GI-NEXT: extr x9, x9, x14, #32
+; CHECK-GI-NEXT: umulh x13, x13, x15
+; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: madd x15, x18, x15, x16
+; CHECK-GI-NEXT: fmov x16, d1
+; CHECK-GI-NEXT: mul x18, x16, x1
+; CHECK-GI-NEXT: asr x1, x16, #63
+; CHECK-GI-NEXT: umulh x2, x16, x0
+; CHECK-GI-NEXT: add x10, x15, x13
+; CHECK-GI-NEXT: extr x10, x10, x17, #32
+; CHECK-GI-NEXT: madd x18, x1, x0, x18
+; CHECK-GI-NEXT: fmov d1, x10
+; CHECK-GI-NEXT: mul x16, x16, x0
+; CHECK-GI-NEXT: add x12, x18, x2
+; CHECK-GI-NEXT: extr x11, x12, x16, #32
+; CHECK-GI-NEXT: mov v1.d[1], x11
+; CHECK-GI-NEXT: ret
%tmp = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> %x, <4 x i64> %y, i32 32)
ret <4 x i64> %tmp
}
define <4 x i16> @widemul(<4 x i16> %x, <4 x i16> %y) nounwind {
-; CHECK-LABEL: widemul:
-; CHECK: // %bb.0:
-; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
-; CHECK-NEXT: shrn v1.4h, v0.4s, #16
-; CHECK-NEXT: xtn v2.4h, v0.4s
-; CHECK-NEXT: shl v0.4h, v1.4h, #14
-; CHECK-NEXT: usra v0.4h, v2.4h, #2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: widemul:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT: shrn v1.4h, v0.4s, #16
+; CHECK-SD-NEXT: xtn v2.4h, v0.4s
+; CHECK-SD-NEXT: shl v0.4h, v1.4h, #14
+; CHECK-SD-NEXT: usra v0.4h, v2.4h, #2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: widemul:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #2
+; CHECK-GI-NEXT: ret
%tmp = call <4 x i16> @llvm.smul.fix.v4i16(<4 x i16> %x, <4 x i16> %y, i32 2)
ret <4 x i16> %tmp
}