[LLVM][CodeGen][AArch64] Fix global-isel for LD1R. (#164418)
LD1Rv8b only supports a base register but the DAG is matched using
am_indexed8 with the offset it finds silently dropped.
I've also fixed a couple of immediate operands types inconsistencies
that don't manifest as bugs because their incorrect scaling is overriden
by the complex pattern and MachineInstr that are correct and thus
there's nothing to test.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 31fcd63..5d9215d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -136,8 +136,8 @@
(ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend))))),
(LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_nonext_32>
- (am_indexed32 GPR64sp:$Rn, uimm12s8:$offset))))),
- (LDRSui GPR64sp:$Rn, uimm12s8:$offset)>;
+ (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+ (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_nonext_32>
(am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
(LDURSi GPR64sp:$Rn, simm9:$offset)>;
@@ -236,11 +236,11 @@
def : Pat<(releasing_store<atomic_store_64> GPR64sp:$ptr, GPR64:$val),
(STLRX GPR64:$val, GPR64sp:$ptr)>;
def : Pat<(relaxed_store<atomic_store_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
- ro_Wextend16:$extend),
+ ro_Wextend64:$extend),
GPR64:$val),
(STRXroW GPR64:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
def : Pat<(relaxed_store<atomic_store_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend16:$extend),
+ ro_Xextend64:$extend),
GPR64:$val),
(STRXroX GPR64:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
def : Pat<(relaxed_store<atomic_store_64>
@@ -276,8 +276,8 @@
(i64 (bitconvert (f64 FPR64Op:$val)))),
(STRDroX FPR64Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
def : Pat<(relaxed_store<atomic_store_64>
- (am_indexed64 GPR64sp:$Rn, uimm12s4:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
- (STRDui FPR64Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
+ (STRDui FPR64Op:$val, GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(relaxed_store<atomic_store_64>
(am_unscaled64 GPR64sp:$Rn, simm9:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
(STURDi FPR64Op:$val, GPR64sp:$Rn, simm9:$offset)>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index fe84193..30b7b03 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -507,7 +507,7 @@
defm : VecROStoreLane64_0Pat<ro32, store, v2i32, i32, ssub, STRSroW, STRSroX>;
}
-def : Pat<(v8i8 (AArch64dup (i8 (load (am_indexed8 GPR64sp:$Rn))))),
+def : Pat<(v8i8 (AArch64dup (i8 (load GPR64sp:$Rn)))),
(LD1Rv8b GPR64sp:$Rn)>;
def : Pat<(v16i8 (AArch64dup (i8 (load GPR64sp:$Rn)))),
(LD1Rv16b GPR64sp:$Rn)>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
index 0b22fa4..c2b2c1e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
@@ -1654,24 +1654,14 @@
}
define <8 x i8> @dup_ld1_from_stack(ptr %__ret) {
-; CHECK-SD-LABEL: dup_ld1_from_stack:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sub sp, sp, #16
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
-; CHECK-SD-NEXT: add x8, sp, #15
-; CHECK-SD-NEXT: ld1r.8b { v0 }, [x8]
-; CHECK-SD-NEXT: add sp, sp, #16
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: dup_ld1_from_stack:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w29, -16
-; CHECK-GI-NEXT: add x8, sp, #15
-; CHECK-GI-NEXT: ld1r.8b { v0 }, [x8]
-; CHECK-GI-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: dup_ld1_from_stack:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: add x8, sp, #15
+; CHECK-NEXT: ld1r.8b { v0 }, [x8]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
entry:
%item = alloca i8, align 1
%0 = load i8, ptr %item, align 1