[TwoAddressInstructionPass] After commuting an instruction and before trying to look for more commutable operands, resample the number of operands.

The new instruciton might have less operands than the original instruction. If we don't resample, the next loop iteration might read an operand that doesn't exist.

X86 can commute blends to movss/movsd which reduces from 4 operands to 3. This happened in the test case that caused r354363 & company to be reverted. A reduced version of that has been committed here.

Really this whole checking for more commutable operands is a little fragile. It assumes that the new instructions operands are the same order and positions as the original except for the pair that was swapped. I don't know of anything that breaks this assumption today, but I've left a fixme. Fixing this will likely require an interface change.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354738 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 737d01e..43d8766 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1244,8 +1244,13 @@
         ++NumAggrCommuted;
         // There might be more than two commutable operands, update BaseOp and
         // continue scanning.
+        // FIXME: This assumes that the new instruction's operands are in the
+        // same positions and were simply swapped.
         BaseOpReg = OtherOpReg;
         BaseOpKilled = OtherOpKilled;
+        // Resamples OpsNum in case the number of operands was reduced. This
+        // happens with X86.
+        OpsNum = MI->getDesc().getNumOperands();
         continue;
       }
       // If this was a commute based on kill, we won't do better continuing.
diff --git a/test/CodeGen/X86/commute-blend-sse41.ll b/test/CodeGen/X86/commute-blend-sse41.ll
index fa7cbb4..6f5b73b 100644
--- a/test/CodeGen/X86/commute-blend-sse41.ll
+++ b/test/CodeGen/X86/commute-blend-sse41.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+sse4.1,-slow-unaligned-mem-16 | FileCheck %s
 
 define <8 x i16> @commute_fold_pblendw(<8 x i16> %a, <8 x i16>* %b) {
 ; CHECK-LABEL: commute_fold_pblendw:
@@ -45,3 +45,29 @@
   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
   ret <4 x i32> %3
 }
+
+; Test case for a crash that occurred due to blendi being commuted to
+; movsd during two address instruction pass. The change in number of operands
+; caused a bad call to getOperand. This caused the revert in r354713.
+%struct.spam = type { i64, i64 }
+
+define void @baz(<2 x i64>* %arg, %struct.spam* %arg1) optsize {
+; CHECK-LABEL: baz:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    movapd (%rdi), %xmm0
+; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [3,3]
+; CHECK-NEXT:    andpd %xmm0, %xmm1
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; CHECK-NEXT:    movupd %xmm1, (%rsi)
+; CHECK-NEXT:    retq
+bb:
+  %tmp = load <2 x i64>, <2 x i64>* %arg, align 16
+  %tmp2 = and <2 x i64> %tmp, <i64 3, i64 3>
+  %tmp3 = getelementptr inbounds %struct.spam, %struct.spam* %arg1, i64 0, i32 0
+  %tmp4 = extractelement <2 x i64> %tmp, i32 0
+  store i64 %tmp4, i64* %tmp3, align 8
+  %tmp5 = getelementptr inbounds %struct.spam, %struct.spam* %arg1, i64 0, i32 1
+  %tmp6 = extractelement <2 x i64> %tmp2, i32 1
+  store i64 %tmp6, i64* %tmp5, align 8
+  ret void
+}