Allow code motion (and thus folding) for atomic (but unordered) memory operands

Building on the work done in D57601, now that we can distinguish between atomic and volatile memory accesses, go ahead and allow code motion of unordered atomics. As seen in the diffs, this allows much better folding of memory operations into using instructions. (Mostly done by the PeepholeOpt pass.)

Note: I have not reviewed all callers of hasOrderedMemoryRef since one of them - isSafeToMove - is very widely used. I'm relying on the documented semantics of each method to judge correctness.

Differential Revision: https://reviews.llvm.org/D59345



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356170 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 95f5eb9..17bd0f3 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -1291,10 +1291,8 @@
     return true;
 
   // Check if any of our memory operands are ordered.
-  // TODO: This should probably be be isUnordered (see D57601), but the callers
-  // need audited and test cases written to be sure.
   return llvm::any_of(memoperands(), [](const MachineMemOperand *MMO) {
-    return MMO->isVolatile() || MMO->isAtomic();
+    return !MMO->isUnordered();
   });
 }
 
diff --git a/test/CodeGen/X86/atomic-non-integer.ll b/test/CodeGen/X86/atomic-non-integer.ll
index 36a55ef..bdeeab3 100644
--- a/test/CodeGen/X86/atomic-non-integer.ll
+++ b/test/CodeGen/X86/atomic-non-integer.ll
@@ -62,8 +62,7 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movzwl (%rdi), %eax
-; CHECK-NEXT:    movzwl %ax, %edi
+; CHECK-NEXT:    movzwl (%rdi), %edi
 ; CHECK-NEXT:    callq __gnu_h2f_ieee
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
@@ -75,8 +74,7 @@
 define float @load_float(float* %fptr) {
 ; CHECK-LABEL: load_float:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movd (%rdi), %xmm0
 ; CHECK-NEXT:    retq
   %v = load atomic float, float* %fptr unordered, align 4
   ret float %v
@@ -85,8 +83,7 @@
 define double @load_double(double* %fptr) {
 ; CHECK-LABEL: load_double:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq (%rdi), %rax
-; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    movq (%rdi), %xmm0
 ; CHECK-NEXT:    retq
   %v = load atomic double, double* %fptr unordered, align 8
   ret double %v
diff --git a/test/CodeGen/X86/atomic-unordered.ll b/test/CodeGen/X86/atomic-unordered.ll
index 8993f29..e56ebe8 100644
--- a/test/CodeGen/X86/atomic-unordered.ll
+++ b/test/CodeGen/X86/atomic-unordered.ll
@@ -437,7 +437,6 @@
   ret i64 %ret
 }
 
-; Legal, as expected
 define i64 @load_fold_add2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_add2:
 ; CHECK-O0:       # %bb.0:
@@ -447,15 +446,14 @@
 ;
 ; CHECK-O3-LABEL: load_fold_add2:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    addq %rsi, %rax
+; CHECK-O3-NEXT:    movq %rsi, %rax
+; CHECK-O3-NEXT:    addq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = add i64 %v, %v2
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_add3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_add3:
 ; CHECK-O0:       # %bb.0:
@@ -466,9 +464,8 @@
 ;
 ; CHECK-O3-LABEL: load_fold_add3:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rcx
 ; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    addq %rcx, %rax
+; CHECK-O3-NEXT:    addq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -495,7 +492,6 @@
   ret i64 %ret
 }
 
-; Legal, as expected
 define i64 @load_fold_sub2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_sub2:
 ; CHECK-O0:       # %bb.0:
@@ -514,7 +510,6 @@
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_sub3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_sub3:
 ; CHECK-O0:       # %bb.0:
@@ -526,8 +521,7 @@
 ; CHECK-O3-LABEL: load_fold_sub3:
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    movq (%rsi), %rcx
-; CHECK-O3-NEXT:    subq %rcx, %rax
+; CHECK-O3-NEXT:    subq (%rsi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -553,7 +547,6 @@
   ret i64 %ret
 }
 
-; Legal, O0 is better than O3 codegen (TODO)
 define i64 @load_fold_mul2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_mul2:
 ; CHECK-O0:       # %bb.0:
@@ -563,15 +556,14 @@
 ;
 ; CHECK-O3-LABEL: load_fold_mul2:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    imulq %rsi, %rax
+; CHECK-O3-NEXT:    movq %rsi, %rax
+; CHECK-O3-NEXT:    imulq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = mul i64 %v, %v2
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_mul3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_mul3:
 ; CHECK-O0:       # %bb.0:
@@ -582,9 +574,8 @@
 ;
 ; CHECK-O3-LABEL: load_fold_mul3:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rcx
 ; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    imulq %rcx, %rax
+; CHECK-O3-NEXT:    imulq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -639,7 +630,6 @@
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_sdiv3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_sdiv3:
 ; CHECK-O0:       # %bb.0:
@@ -651,9 +641,8 @@
 ; CHECK-O3-LABEL: load_fold_sdiv3:
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    movq (%rsi), %rcx
 ; CHECK-O3-NEXT:    cqto
-; CHECK-O3-NEXT:    idivq %rcx
+; CHECK-O3-NEXT:    idivq (%rsi)
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -685,7 +674,6 @@
   ret i64 %ret
 }
 
-; Legal, as expected
 define i64 @load_fold_udiv2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_udiv2:
 ; CHECK-O0:       # %bb.0:
@@ -706,7 +694,6 @@
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_udiv3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_udiv3:
 ; CHECK-O0:       # %bb.0:
@@ -719,9 +706,8 @@
 ; CHECK-O3-LABEL: load_fold_udiv3:
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    movq (%rsi), %rcx
 ; CHECK-O3-NEXT:    xorl %edx, %edx
-; CHECK-O3-NEXT:    divq %rcx
+; CHECK-O3-NEXT:    divq (%rsi)
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -783,7 +769,6 @@
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_srem3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_srem3:
 ; CHECK-O0:       # %bb.0:
@@ -796,9 +781,8 @@
 ; CHECK-O3-LABEL: load_fold_srem3:
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    movq (%rsi), %rcx
 ; CHECK-O3-NEXT:    cqto
-; CHECK-O3-NEXT:    idivq %rcx
+; CHECK-O3-NEXT:    idivq (%rsi)
 ; CHECK-O3-NEXT:    movq %rdx, %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
@@ -859,7 +843,6 @@
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_urem3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_urem3:
 ; CHECK-O0:       # %bb.0:
@@ -873,9 +856,8 @@
 ; CHECK-O3-LABEL: load_fold_urem3:
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    movq (%rsi), %rcx
 ; CHECK-O3-NEXT:    xorl %edx, %edx
-; CHECK-O3-NEXT:    divq %rcx
+; CHECK-O3-NEXT:    divq (%rsi)
 ; CHECK-O3-NEXT:    movq %rdx, %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
@@ -1101,7 +1083,6 @@
   ret i64 %ret
 }
 
-; Legal, as expected
 define i64 @load_fold_and2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_and2:
 ; CHECK-O0:       # %bb.0:
@@ -1111,15 +1092,14 @@
 ;
 ; CHECK-O3-LABEL: load_fold_and2:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    andq %rsi, %rax
+; CHECK-O3-NEXT:    movq %rsi, %rax
+; CHECK-O3-NEXT:    andq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = and i64 %v, %v2
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_and3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_and3:
 ; CHECK-O0:       # %bb.0:
@@ -1130,9 +1110,8 @@
 ;
 ; CHECK-O3-LABEL: load_fold_and3:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rcx
 ; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    andq %rcx, %rax
+; CHECK-O3-NEXT:    andq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -1159,7 +1138,6 @@
   ret i64 %ret
 }
 
-; Legal, as expected
 define i64 @load_fold_or2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_or2:
 ; CHECK-O0:       # %bb.0:
@@ -1169,15 +1147,14 @@
 ;
 ; CHECK-O3-LABEL: load_fold_or2:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    orq %rsi, %rax
+; CHECK-O3-NEXT:    movq %rsi, %rax
+; CHECK-O3-NEXT:    orq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = or i64 %v, %v2
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_or3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_or3:
 ; CHECK-O0:       # %bb.0:
@@ -1188,9 +1165,8 @@
 ;
 ; CHECK-O3-LABEL: load_fold_or3:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rcx
 ; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    orq %rcx, %rax
+; CHECK-O3-NEXT:    orq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -1217,7 +1193,6 @@
   ret i64 %ret
 }
 
-; Legal, as expected
 define i64 @load_fold_xor2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_xor2:
 ; CHECK-O0:       # %bb.0:
@@ -1227,15 +1202,14 @@
 ;
 ; CHECK-O3-LABEL: load_fold_xor2:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    xorq %rsi, %rax
+; CHECK-O3-NEXT:    movq %rsi, %rax
+; CHECK-O3-NEXT:    xorq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = xor i64 %v, %v2
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_xor3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_xor3:
 ; CHECK-O0:       # %bb.0:
@@ -1246,9 +1220,8 @@
 ;
 ; CHECK-O3-LABEL: load_fold_xor3:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rcx
 ; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    xorq %rcx, %rax
+; CHECK-O3-NEXT:    xorq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -1256,7 +1229,6 @@
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i1 @load_fold_icmp1(i64* %p) {
 ; CHECK-O0-LABEL: load_fold_icmp1:
 ; CHECK-O0:       # %bb.0:
@@ -1268,8 +1240,7 @@
 ;
 ; CHECK-O3-LABEL: load_fold_icmp1:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    cmpq $15, %rax
+; CHECK-O3-NEXT:    cmpq $15, (%rdi)
 ; CHECK-O3-NEXT:    sete %al
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
@@ -1277,7 +1248,6 @@
   ret i1 %ret
 }
 
-; Legal to fold (TODO)
 define i1 @load_fold_icmp2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_icmp2:
 ; CHECK-O0:       # %bb.0:
@@ -1289,8 +1259,7 @@
 ;
 ; CHECK-O3-LABEL: load_fold_icmp2:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    cmpq %rsi, %rax
+; CHECK-O3-NEXT:    cmpq %rsi, (%rdi)
 ; CHECK-O3-NEXT:    sete %al
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
@@ -1298,7 +1267,6 @@
   ret i1 %ret
 }
 
-; Legal to fold (TODO)
 define i1 @load_fold_icmp3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_icmp3:
 ; CHECK-O0:       # %bb.0:
@@ -1311,9 +1279,8 @@
 ;
 ; CHECK-O3-LABEL: load_fold_icmp3:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    movq (%rsi), %rcx
-; CHECK-O3-NEXT:    cmpq %rcx, %rax
+; CHECK-O3-NEXT:    movq (%rsi), %rax
+; CHECK-O3-NEXT:    cmpq %rax, (%rdi)
 ; CHECK-O3-NEXT:    sete %al
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
@@ -1441,9 +1408,8 @@
 ;
 ; CHECK-O3-LABEL: rmw_fold_mul2:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    imulq %rsi, %rax
-; CHECK-O3-NEXT:    movq %rax, (%rdi)
+; CHECK-O3-NEXT:    imulq (%rdi), %rsi
+; CHECK-O3-NEXT:    movq %rsi, (%rdi)
 ; CHECK-O3-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = mul i64 %prev, %v
@@ -2070,9 +2036,8 @@
 ;
 ; CHECK-O3-LABEL: load_forwarding:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rcx
 ; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    orq %rcx, %rax
+; CHECK-O3-NEXT:    orq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %v2 = load atomic i64, i64* %p unordered, align 8